#!/usr/bin/python def process(filename, verbose=True): "Process ATOM records from PDB file" atoms = read_pdb(filename) numPhe = count(atoms, "PHE") numTyr = count(atoms, "TYR") if verbose: print "%s: %d PHE and %d TYR" % (filename, numPhe, numTyr) return numPhe, numTyr def count(atoms, rtype): "Count the number of residues of the given type" seen = set() num = 0 for a in atoms: rid = (a[3], a[2]) # residue id = (sequence number, chain id) if rid in seen: continue seen.add(rid) if a[1] == rtype: num += 1 return num def read_pdb(filename): "Read ATOM records from PDB file" f = file(filename) atoms = [] for line in f: if not line.startswith("ATOM"): continue # ATOM records are fixed-format, so we cannot use whitespace for parsing if line[16] != ' ' and line[16] != 'A': # Alternate location continue atom = (line[12:16], # Atom name line[17:20].strip(), # Residue type line[21], # Chain id int(line[22:26]), # Sequence number ) # float(line[30:38]), float(line[38:46]), float(line[46:54])) atoms.append(atom) f.close() return atoms if __name__ == "__main__": def count_all(): import os files = os.listdir(".") for filename in files: if filename.endswith(".ent"): process(filename) import cProfile as profile from pstats import Stats profile.run("count_all()", "optimize_v3.stats") s = Stats("optimize_v3.stats") s.sort_stats("cumulative", "time", "calls") s.print_stats(5)