"""generate PSF information from sequence or coordinates. Particularly useful are the pdbToPSF and seqToPSF routines. """ terminalAtoms = ['H5T','H3T'] residueTypes = {} residueTypes['dna'] = ('CYT', 'GUA', 'ADE', 'THY','C', 'G', 'I', 'INO', 'U', 'URA') residueTypes['prot'] = ('ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL') def pdbToSeq(pdbRecord): """ return a list of list of sequences. side effect: set the global variable beginResid""" ret = [] seq=[] curSeg='' curResid=0 terminate=0 seg='' resid=0 lostRes='' #resid lost if no TER or terminal atoms previousTerminate=0 #true if one termination atom has been seen beginResid=0 lines=pdbRecord.split('\n') for line in lines: if line.startswith('SEQRES'): return seqres(lines) if line.startswith('TER'): terminate=1 if line.startswith('ATOM'): res = line[17:21] resid = int(line[23:26]) if not beginResid: beginResid=resid seg = line[72:76] atom = line[13:16] if seq and seg!=curSeg: #print "seq change termination" terminate=1 lostRes=res pass #print resid, curResid if seq and resid!=curResid and resid!=curResid+1: #print "resid skip termination" terminate=1 lostRes=res pass if len(seq)>1 and atom in terminalAtoms: #print "found terminal atom", len(seq) if previousTerminate: terminate=1 previousTerminate=0 lostRes=res else: previousTerminate=1 pass pass if not terminate and resid and resid!=curResid: seq.append(res) pass if terminate and seq: ret.append((beginResid,curSeg,seq)) previousTerminate=0 beginResid=0 seq=[] curSeg='' if lostRes: seq.append(lostRes) beginResid=resid lostRes='' curSeg=seg pass terminate=0 curResid=beginResid else: curResid=resid curSeg = seg pass pass if seq: ret.append( (beginResid,curSeg,seq) ) return ret def seqres(lines): """ read the pdb SEQRES field return a list of lists of sequences. """ print "using seqres field" records = filter(lambda x:x.startswith('SEQRES'),lines) chains={} for record in records: record = record[:min(len(record),72)] id=record[11] residues=record[19:].split() if chains.has_key(id): chains[id] += residues else: chains[id] = residues pass pass startResid=1 ids=chains.keys() ids.sort() ret=[] for id in ids: ret.append( [startResid,'',chains[id]] ) startResid += len(chains[id]) pass #get segment names segs=[] chainCnt=0 prevChainID='' for line in lines: if line.startswith('ATOM'): seg = line[72:76] chainID=line[21] if prevChainID and chainID!= prevChainID: chainCnt+=1 pass prevChainID=chainID resid = int(line[23:26]) if not ret[chainCnt][1]: ret[chainCnt][1]=seg pass if chainCnt+1 >= len(ret): break if resid>=ret[chainCnt+1][0]: chainCnt+=1 pass pass pass return ret def deduceSeqType(seq): ret='unknown' if not seq: return ret for type in residueTypes.keys(): if seq[0].strip() in residueTypes[type]: ret=type pass if ret=='unknown': return ret for res in seq: res = res.strip() if not res in residueTypes[ret]: raise "residue %s not of type %s in %s" % (res,ret,`seq`) pass return ret residueMap={} residueMap['G'] = 'GUA' residueMap['C'] = 'CYT' residueMap['T'] = 'THY' residueMap['A'] = 'ADE' residueMap['I'] = 'INO' def renameResidues(seq): " single letter to three letter names" ret = [] for r in seq: r=r.strip() if len(r)==1: ret.append( residueMap[r] ) else: ret.append(r) pass pass return ret def cisPeptide(startResid, segName=' '): """ given a startResid, set up topology to make a cis peptide bond between residues numbers startResid and startResid+1. Call this routine after seq2PSF. This function correctly treats bonds involving proline and non-proline residues. The optional segName argument argument should be specified if there is more than one segment in the structure. """ patch="CISP" from atomSel import AtomSel if len( AtomSel('resid %d and segid "%s"' % (startResid+1,segName)) ): patch="CIPP" pass import xplor xplor.command(''' patch %s reference=-=( resid %d and segid "%s") reference=+=( resid %d and segid "%s") end '''% (patch,startResid,segName,startResid+1,segName)) def dAmino(resid, segName=' '): """ change given residue to a D-amino acid. Call this routine after seq2PSF. The optional segName argument argument should be specified if there is more than one segment in the structure. """ import xplor xplor.command(''' patch LtoD reference=nil=( resid %d and segid "%s") end''' % (resid,segName)) return def seqToPSF(seq, seqType='auto', startResid=1, deprotonateHIS=1, segName=' ', disulfide_bonds=[], disulfide_bridges=[], amidate_cterm=0, ): """given a primary protein or nucleic acid sequence, generate PSF info and load the appropriate parameters. If deprotonateHIS is set, the HD1 atom is deleted from Histidines. This is the default. if seqType is auto, the type (prot, dna) is determined from the sequence. type 'rna' must be explicitly specified. didulfide bonds are specified by a list of resid pairs in either disulfide_bonds or disulfide_bridges. Use disulfide_bonds for actual bonds and disulfide_bridges to remove the cysteine HG proton- for representing disulfide bonds by NOE restraints. """ import xplor from selectTools import maxResid from string import join from simulationWorld import SimulationWorld_world as simWorld (echo, messages) = xplor.command("set messages none echo off end", ("prev_echo","prev_messages")) import os if type(seq)==type("string") and os.path.exists(seq): seq = file(seq).read() pass if type(seq)==type("string"): seq = seq.split() pass if seqType=='auto': seqType = deduceSeqType(seq) #split up seq to avoid overlong line seqs=[[]] ind=0 cnt=0 for r in seq: if cnt>20: ind+=1 seqs.append([]) cnt=0 pass seqs[ind].append( r ) cnt+=1 pass splitSeq = join(map(lambda x:join(x),seqs),'\n') #seq = join(seq) import protocol if seqType=='prot': protocol.initTopology("protein",reset=1) protocol.initParams("protein") cterm = "CTER" if amidate_cterm: cterm = "CTN " xplor.command(''' segment name="%s" SETUP=TRUE number=%d ''' % (segName, startResid) + r''' chain LINK PEPP HEAD - * TAIL + PRO END { LINK to PRO } LINK PEPT HEAD - * TAIL + * END FIRSt PROP TAIL + PRO END { nter for PRO } FIRSt NTER TAIL + * END LAST %s HEAD - * END sequence %s end end end ''' % (cterm,splitSeq)) if deprotonateHIS: xplor.command("delete select (name hd1 and resname his) end") pass elif seqType=='dna': protocol.initTopology("nucleic",reset=1) protocol.initParams("nucleic") xplor.command(''' segment name="%s" SETUP=TRUE number=%d ''' % (segName, startResid) + r''' chain REMARKS TOPH11.NUC MACRO for RNA/DNA sequence ! this is a macro to define standard DNA/RNA polynucleotide bonds ! and termini to generate a sequence. ! it should be added as @TOPTRNA8.NUC in the SEGMent SEQUence ! level. ! ! Axel Brunger, 17-AUG-84 LINK NUC HEAD - * TAIL + * END FIRST 5ter TAIL + * END LAST 3TER HEAD - * END ''' + ''' sequence %s end end end''' % splitSeq) for i in range(startResid,maxResid()+1): xplor.command("patch DEOX reference=NIL=( resid %d ) end" % i) pass # rename some atoms xplor.command(r''' vector do (name "HN'") ( NAME H61 AND RESNAME ADE ) vector do (name "HN''") ( name H62 and resname ADE ) vector do (name "HN'") ( name H21 and resname GUA ) vector do (name "HN''") ( name H22 and resname GUA ) vector do (name "HN'") ( name H41 and resname CYT ) vector do (name "HN''") ( name H42 and resname CYT ) vector do (name "CM") ( name C5A and resname THY ) ''') elif seqType=='rna': protocol.initTopology("nucleic",reset=1) protocol.initParams("nucleic") xplor.command(''' segment name="%s" SETUP=TRUE number=%d ''' % (segName, startResid) + r''' chain REMARKS TOPH11.NUC MACRO for RNA/DNA sequence ! this is a macro to define standard DNA/RNA polynucleotide bonds ! and termini to generate a sequence. ! it should be added as @TOPTRNA8.NUC in the SEGMent SEQUence ! level. ! ! Axel Brunger, 17-AUG-84 LINK NUC HEAD - * TAIL + * END FIRST 5ter TAIL + * END LAST 3TER HEAD - * END ''' + ''' sequence %s end end end''' % splitSeq) # rename some atoms xplor.command(r''' vector do (name "HN'") ( NAME H61 AND RESNAME ADE ) vector do (name "HN''") ( name H62 and resname ADE ) vector do (name "HN'") ( name H21 and resname GUA ) vector do (name "HN''") ( name H22 and resname GUA ) vector do (name "HN'") ( name H41 and resname CYT ) vector do (name "HN''") ( name H42 and resname CYT ) vector do (name "CM") ( name C5A and resname THY ) ''') else: from simulationWorld import SimulationWorld_world as simWorld if simWorld().logLevel()!='none': print "seqToPSF: Warning: unsupported sequence type: %s" % seqType pass for (res1,res2) in disulfide_bonds: xplor.command(""" patch DISU reference=1=( resid %d ) reference=2=( resid %d ) end""" % (res1,res2)) pass for (res1,res2) in disulfide_bridges: xplor.command(""" patch DISN reference=1=( resid %d ) reference=2=( resid %d ) end""" % (res1,res2)) xplor.command("set messages %s echo %s end" % (messages,echo)) return def renameAtoms(sel='all'): from atomSel import AtomSel if type(sel)==type('string'): sel = AtomSel(sel) for atom in sel: name=atom.atomName() if name.find("*")>=0: name = name.replace("*","'") atom.setAtomName(name) pass return def pdbToPSF(pdbRecord,psfFilename=''): """given a PDB record, generate XPLOR PSF structure/topology info. The PDB record can be a filename or a string containing PDB contents. """ import xplor from selectTools import maxResid try: import os os.stat(pdbRecord) pdbRecord = open(pdbRecord).read() except: pass xplor.command("SET ECHO=off mess=off END") xplor.command("eval ($saveEcho=$prev_echo)") xplor.command("eval ($saveMess=$prev_messages)") sequences = pdbToSeq(pdbRecord) for (beginResid, segid, seq) in sequences: seq = renameResidues(seq) if segid=='*': segid='' #print type, beginResid, segid, seq seqToPSF(seq,seqType='auto',startResid=beginResid,segName=segid) pass if psfFilename: xplor.command("write psf output=%s end" % psfFilename) #renameAtoms() xplor.command("SET ECHO=$saveEcho mess=$saveMess END") return