#!/usr/bin/python def process(filename, verbose=True): "Process ATOM records from PDB file" atoms = read_pdb(filename) numPhe, numTyr = count(atoms) if verbose: print "%s: %d PHE and %d TYR" % (filename, numPhe, numTyr) return numPhe, numTyr def count(atoms): "Count the number of PHE and TYR residues" seen = set() numPhe = 0 numTyr = 0 for a in atoms: rid = (a[3], a[2]) # residue id = (sequence number, chain id) if rid in seen: continue seen.add(rid) if a[1] == "PHE": numPhe += 1 elif a[1] == "TYR": numTyr += 1 return numPhe, numTyr def read_pdb(filename): "Read ATOM records from PDB file" f = file(filename) atoms = [] for line in f: if not line.startswith("ATOM"): continue # ATOM records are fixed-format, so we cannot use whitespace for parsing if line[16] != ' ' and line[16] != 'A': # Alternate location continue atom = (line[12:16], # Atom name line[17:20].strip(), # Residue type line[21], # Chain id int(line[22:26]), # Sequence number float(line[30:38]), float(line[38:46]), float(line[46:54])) atoms.append(atom) f.close() return atoms if __name__ == "__main__": def count_all(): import os files = os.listdir(".") for filename in files: if filename.endswith(".ent"): process(filename) import cProfile as profile from pstats import Stats profile.run("count_all()", "optimize_v2.stats") s = Stats("optimize_v2.stats") s.sort_stats("cumulative", "time", "calls") s.print_stats(5)