"""generate PSF information from sequence or coordinates. Particularly useful are the pdbToPSF and seqToPSF routines. """ terminalAtoms = ['H5T','H3T'] residueTypes = {} residueTypes['dna'] = ('CYT', 'GUA', 'ADE', 'THY','C', 'G', 'I', 'INO', 'U', 'URA') residueTypes['prot'] = ('ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL') def pdbToSeq(pdbRecord): """ return a list of list of sequences. side effect: set the global variable beginResid""" ret = [] seq=[] curSeg='' curResid=0 terminate=0 seg='' resid=0 lostRes='' #resid lost if no TER or terminal atoms previousTerminate=0 #true if one termination atom has been seen beginResid=0 lines=pdbRecord.split('\n') for line in lines: if line.startswith('SEQRES'): return seqres(lines) if line.startswith('TER'): terminate=1 if line.startswith('ATOM'): res = line[17:21] resid = int(line[23:26]) if not beginResid: beginResid=resid seg = line[72:76] atom = line[13:16] if seq and seg!=curSeg: #print "seq change termination" terminate=1 lostRes=res pass #print resid, curResid if seq and resid!=curResid and resid!=curResid+1: #print "resid skip termination" terminate=1 lostRes=res pass if len(seq)>1 and atom in terminalAtoms: #print "found terminal atom", len(seq) if previousTerminate: terminate=1 previousTerminate=0 lostRes=res else: previousTerminate=1 pass pass if not terminate and resid and resid!=curResid: seq.append(res) pass if terminate and seq: ret.append((beginResid,curSeg,seq)) previousTerminate=0 beginResid=0 seq=[] curSeg='' if lostRes: seq.append(lostRes) beginResid=resid lostRes='' curSeg=seg pass terminate=0 curResid=beginResid else: curResid=resid curSeg = seg pass pass if seq: ret.append( (beginResid,curSeg,seq) ) return ret def seqres(lines): """ read the pdb SEQRES field return a list of lists of sequences. """ print "using seqres field" records = filter(lambda x:x.startswith('SEQRES'),lines) chains={} for record in records: record = record[:min(len(record),72)] id=record[11] residues=record[19:].split() if chains.has_key(id): chains[id] += residues else: chains[id] = residues pass pass startResid=1 ids=chains.keys() ids.sort() ret=[] for id in ids: ret.append( [startResid,'',chains[id]] ) startResid += len(chains[id]) pass #get segment names segs=[] chainCnt=0 prevChainID='' for line in lines: if line.startswith('ATOM'): seg = line[72:76] chainID=line[21] if prevChainID and chainID!= prevChainID: chainCnt+=1 pass prevChainID=chainID resid = int(line[23:26]) if not ret[chainCnt][1]: ret[chainCnt][1]=seg pass if chainCnt+1 >= len(ret): break if resid>=ret[chainCnt+1][0]: chainCnt+=1 pass pass pass return ret def deduceSeqType(seq): ret='unknown' if not seq: return ret for type in residueTypes.keys(): if seq[0].strip() in residueTypes[type]: ret=type pass if ret=='unknown': return ret for res in seq: res = res.strip() if not res in residueTypes[ret]: raise "residue %s not of type %s in %s" % (res,ret,`seq`) pass return ret residueMap={} residueMap['G'] = 'GUA' residueMap['C'] = 'CYT' residueMap['T'] = 'THY' residueMap['A'] = 'ADE' residueMap['I'] = 'INO' def renameResidues(seq): " single letter to three letter names" ret = [] for r in seq: r=r.strip() if len(r)==1: ret.append( residueMap[r] ) else: ret.append(r) pass pass return ret def cisPeptide(startResid, segName=' '): """ given a startResid, set up topology to make a cis peptide bond between residues numbers startResid and startResid+1. Call this routine after seq2PSF. This function correctly treats bonds involving proline and non-proline residues. The optional segName argument argument should be specified if there is more than one segment in the structure. """ patch="CISP" from atomSel import AtomSel if len( AtomSel('resid %d and segid "%s"' % (startResid+1,segName)) ): patch="CIPP" pass import xplor xplor.command(''' patch %s reference=-=( resid %d and segid "%s") reference=+=( resid %d and segid "%s") end '''% (patch,startResid,segName,startResid+1,segName)) def dAmino(resid, segName=' '): """ change given residue to a D-amino acid. Call this routine after seq2PSF. The optional segName argument argument should be specified if there is more than one segment in the structure. """ import xplor xplor.command(''' patch LtoD reference=nil=( resid %d and segid "%s") end''' % (resid,segName)) return def seqToPSF(seq, seqType='prot', startResid=1, deprotonateHIS=1, segName=' '): """given a primary protein or nucleic acid sequence, generate PSF info and load the appropriate parameters. If deprotonateHIS is set, the HD1 atom is deleted from Histidines. This is the default. """ import xplor from selectTools import maxResid from string import join from simulationWorld import SimulationWorld_world as simWorld (echo, messages) = xplor.command("set messages none echo off end", ("prev_echo","prev_messages")) import os if type(seq)==type("string") and os.path.exists(seq): seq = file(seq).read() pass if type(seq)==type("string"): seq = seq.split() pass #split up seq to avoid overlong line seqs=[[]] ind=0 cnt=0 for r in seq: if cnt>20: ind+=1 seqs.append([]) cnt=0 pass seqs[ind].append( r ) cnt+=1 pass seq = join(map(lambda x:join(x),seqs),'\n') #seq = join(seq) xplor.command("rtf reset end") if seqType=='prot': xplor.command("rtf @TOPPAR:protein.top end") xplor.command(""" evaluate ($kbbang = 500.0) evaluate ($kbbimp = 500.0) parameter reset @TOPPAR:protein.par end """) xplor.command(''' segment name="%s" SETUP=TRUE number=%d ''' % (segName, startResid) + r''' chain LINK PEPP HEAD - * TAIL + PRO END { LINK to PRO } LINK PEPT HEAD - * TAIL + * END FIRSt PROP TAIL + PRO END { nter for PRO } FIRSt NTER TAIL + * END LAST CTER HEAD - * END sequence %s end end end ''' % seq) if deprotonateHIS: xplor.command("delete select (name hd1 and resname his) end") pass elif seqType=='dna': xplor.command("rtf @TOPPAR:topnah1er1_mod_new.inp end") xplor.command(""" evaluate ($kbbang = 500.0) evaluate ($kbbimp = 500.0) parameter reset @TOPPAR:parnah1er1_mod_new.inp end """) xplor.command(''' segment name="%s" SETUP=TRUE number=%d ''' % (segName, startResid) + r''' chain REMARKS TOPH11.NUC MACRO for RNA/DNA sequence ! this is a macro to define standard DNA/RNA polynucleotide bonds ! and termini to generate a sequence. ! it should be added as @TOPTRNA8.NUC in the SEGMent SEQUence ! level. ! ! Axel Brunger, 17-AUG-84 LINK NUC HEAD - * TAIL + * END FIRST 5ter TAIL + * END LAST 3TER HEAD - * END ''' + ''' sequence %s end end end''' % seq) for i in range(startResid,maxResid()+1): xplor.command("patch DEOX reference=NIL=( resid %d ) end" % i) pass # rename some atoms xplor.command(r''' vector do (name "HN'") ( NAME H61 AND RESNAME ADE ) vector do (name "HN''") ( name H62 and resname ADE ) vector do (name "HN'") ( name H21 and resname GUA ) vector do (name "HN''") ( name H22 and resname GUA ) vector do (name "HN'") ( name H41 and resname CYT ) vector do (name "HN''") ( name H42 and resname CYT ) vector do (name "CM") ( name C5A and resname THY ) ''') else: from simulationWorld import SimulationWorld_world as simWorld if simWorld().logLevel()!='none': print "seqToPSF: Warning: unsupported sequence type: %s" % seqType pass xplor.command("set messages %s echo %s end" % (messages,echo)) return def renameAtoms(sel='all'): from atomSel import AtomSel if type(sel)==type('string'): sel = AtomSel(sel) for atom in sel: name=atom.atomName() if name.find("*")>=0: name = name.replace("*","'") atom.setAtomName(name) pass return def pdbToPSF(pdbRecord,psfFilename=''): """given a PDB record, generate XPLOR PSF structure/topology info. The PDB record can be a filename or a string containing PDB contents. """ import xplor from selectTools import maxResid try: import os os.stat(pdbRecord) pdbRecord = open(pdbRecord).read() except: pass xplor.command("SET ECHO=off mess=off END") xplor.command("eval ($saveEcho=$prev_echo)") xplor.command("eval ($saveMess=$prev_messages)") sequences = pdbToSeq(pdbRecord) for (beginResid, segid, seq) in sequences: seq = renameResidues(seq) type = deduceSeqType(seq) if segid=='*': segid='' #print type, beginResid, segid, seq seqToPSF(seq,seqType=type,startResid=beginResid,segName=segid) pass if psfFilename: xplor.command("write psf output=%s end" % psfFilename) #renameAtoms() xplor.command("SET ECHO=$saveEcho mess=$saveMess END") return