"""generate PSF information from sequence or coordinates.

Particularly useful are the pdbToPSF and seqToPSF routines.
"""

terminalAtoms = ['H5T','H3T']
residueTypes = {}
residueTypes['dna'] = ('CYT', 'GUA', 'ADE', 'THY','C', 'G', 'I', 'INO',
                       'U', 'URA')
residueTypes['prot'] = ('ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY',
                        'HIS', 'ILE', 'LEU', 'LYS', 'MET',
                        'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL')


def pdbToSeq(pdbRecord):
    """ return a list of list of sequences.
    side effect: set the global variable beginResid"""
    ret = []
    seq=[]
    curSeg=''
    curResid=0
    terminate=0
    seg=''
    resid=0
    lostRes=''  #resid lost if no TER or terminal atoms
    previousTerminate=0 #true if one termination atom has been seen
    beginResid=0
    lines=pdbRecord.split('\n')
    for line in lines:
        if line.startswith('SEQRES'):
            return seqres(lines)
        if line.startswith('TER'): terminate=1
        if line.startswith('ATOM'): 
            res = line[17:21]
            resid = int(line[23:26])
            if not beginResid: beginResid=resid
            seg = line[72:76]
            atom = line[13:16]
            if seq and seg!=curSeg:
                #print "seq change termination"
                terminate=1
                lostRes=res
                pass
            #print resid, curResid
            if seq and resid!=curResid and resid!=curResid+1:
                #print "resid skip termination"
                terminate=1
                lostRes=res
                pass
            if len(seq)>1 and atom in terminalAtoms:
                #print "found terminal atom", len(seq)
                if previousTerminate:
                    terminate=1
                    previousTerminate=0
                    lostRes=res
                else:
                    previousTerminate=1
                    pass
                pass
            if not terminate and resid and resid!=curResid: seq.append(res)
            pass
        if terminate and seq:
            ret.append((beginResid,curSeg,seq))
            previousTerminate=0
            beginResid=0
            seq=[]
            curSeg=''
            if lostRes:
                seq.append(lostRes)
                beginResid=resid
                lostRes=''
                curSeg=seg
                pass
            terminate=0
            curResid=beginResid
        else:
            curResid=resid
            curSeg = seg
            pass
        pass
    if seq: ret.append( (beginResid,curSeg,seq) )
    return ret

def seqres(lines):
    """ read the pdb SEQRES field
    return a list of lists of sequences.
    """
    print "using seqres field"
    records = filter(lambda x:x.startswith('SEQRES'),lines)

    chains={}
    for record in records:
        record = record[:min(len(record),72)]
        id=record[11]
        residues=record[19:].split()
        if chains.has_key(id):
            chains[id] += residues
        else:
            chains[id] = residues
            pass
        pass
    startResid=1
    ids=chains.keys()
    ids.sort()
    ret=[]
    for id in ids:
        ret.append( [startResid,'',chains[id]] )
        startResid += len(chains[id])
        pass

    #get segment names
    segs=[]
    chainCnt=0
    prevChainID=''
    for line in lines:
        if line.startswith('ATOM'): 
            seg = line[72:76]
            chainID=line[21]
            if prevChainID and chainID!= prevChainID:
                chainCnt+=1
                pass
            prevChainID=chainID
            resid = int(line[23:26])
            if not ret[chainCnt][1]:
                ret[chainCnt][1]=seg
                pass
            if chainCnt+1 >= len(ret):
                break

            if resid>=ret[chainCnt+1][0]:
                chainCnt+=1
                pass
            pass
        pass

    return ret
        
    
def deduceSeqType(seq):
    ret='unknown'
    if not seq: return ret
    for type in residueTypes.keys():
        if seq[0].strip() in residueTypes[type]: ret=type
        pass
    if ret=='unknown': return ret
    for res in seq:
        res = res.strip()
        if not res in residueTypes[ret]:
            raise "residue %s not of type %s in %s" % (res,ret,`seq`)
        pass
    return ret

residueMap={}
residueMap['G'] = 'GUA'
residueMap['C'] = 'CYT'
residueMap['T'] = 'THY'
residueMap['A'] = 'ADE'
residueMap['I'] = 'INO'
    
def renameResidues(seq):
    " single letter to three letter names"
    ret = []
    for r in seq:
        r=r.strip()
        if len(r)==1:
            ret.append( residueMap[r] )
        else:
            ret.append(r)
            pass
        pass
    return ret

def cisPeptide(startResid,
                 segName='    '):
    """
    given a startResid, set up topology to make a cis peptide bond between
    residues numbers startResid and startResid+1.

    Call this routine after seq2PSF.

    This function correctly treats bonds involving proline and non-proline
    residues.

    The optional segName argument argument should be specified if there is
    more than one segment in the structure.
    """
    patch="CISP"
    from atomSel import AtomSel
    if len( AtomSel('resid %d and segid "%s"' % (startResid+1,segName)) ):
        patch="CIPP"
        pass
    import xplor
    xplor.command('''
    patch %s
    reference=-=( resid %d and segid "%s")
    reference=+=( resid %d and segid "%s") 
    end
    '''% (patch,startResid,segName,startResid+1,segName))

def dAmino(resid,
           segName='    '):
    """
    change given residue to a D-amino acid.

    Call this routine after seq2PSF.

    The optional segName argument argument should be specified if there is
    more than one segment in the structure.
    """
    import xplor
    xplor.command('''
    patch LtoD 
      reference=nil=( resid %d and segid "%s")
    end''' % (resid,segName))
    return
         

def seqToPSF(seq,
             seqType='auto',
             startResid=1,
             deprotonateHIS=1,
             segName='    ',
             disulfide_bonds=[],
             disulfide_bridges=[],
             amidate_cterm=0,
             ):
    """given a primary protein or nucleic acid sequence, generate PSF info
    and load the appropriate parameters.

    If deprotonateHIS is set, the HD1 atom is deleted from Histidines. This is
    the default.

    if seqType is auto, the type (prot, dna) is determined from the
    sequence. type 'rna' must be explicitly specified.

    didulfide bonds are specified by a list of resid pairs in either
    disulfide_bonds or disulfide_bridges. Use disulfide_bonds for actual bonds
    and disulfide_bridges to remove the cysteine HG proton- for representing
    disulfide bonds by NOE restraints.
    """
    import xplor
    from selectTools import maxResid
    from string import join
    from simulationWorld import SimulationWorld_world as simWorld

    (echo, messages) = xplor.command("set messages none echo off end",
                                     ("prev_echo","prev_messages"))
    

    import os
    if type(seq)==type("string") and os.path.exists(seq):
        seq = file(seq).read()
        pass
            
    if type(seq)==type("string"):
        seq = seq.split()
        pass

    if seqType=='auto': seqType = deduceSeqType(seq)


    #split up seq to avoid overlong line
    seqs=[[]]
    ind=0
    cnt=0
    for r in seq:
        if cnt>20:
            ind+=1
            seqs.append([])
            cnt=0
            pass
        seqs[ind].append( r )
        cnt+=1
        pass
    splitSeq = join(map(lambda x:join(x),seqs),'\n')
    #seq = join(seq)

    import protocol

    if seqType=='prot':
        protocol.initTopology("protein",reset=1)
        protocol.initParams("protein")

        cterm = "CTER"
        if amidate_cterm: cterm = "CTN "

        xplor.command('''
        segment
        name="%s"
        SETUP=TRUE
        number=%d
        ''' % (segName, startResid) + r'''
        chain
        LINK PEPP    HEAD - *     TAIL + PRO     END  { LINK to PRO }
        LINK PEPT    HEAD - *     TAIL + *       END
        
        FIRSt PROP                TAIL + PRO     END { nter for PRO }
        FIRSt NTER                TAIL + *       END
        
        LAST  %s   HEAD - *                    END
        

        sequence %s
        end
        end
        end
        ''' % (cterm,splitSeq))

        if deprotonateHIS:
            xplor.command("delete select (name hd1 and resname his) end")
            pass

    elif seqType=='dna':
        protocol.initTopology("nucleic",reset=1)
        protocol.initParams("nucleic")

        xplor.command('''
        segment
        name="%s"
        SETUP=TRUE
        number=%d
        ''' % (segName, startResid) + r'''
        chain
        REMARKS  TOPH11.NUC  MACRO for RNA/DNA sequence

        ! this is a macro to define standard DNA/RNA polynucleotide bonds
        ! and termini to generate a sequence.
        ! it should be added as @TOPTRNA8.NUC in the SEGMent SEQUence
        ! level.
        ! 
        ! Axel Brunger, 17-AUG-84


        LINK NUC  HEAD - *  TAIL + *  END
        
        FIRST  5ter  TAIL + * END

        LAST 3TER  HEAD - * END
        
        ''' + '''
        sequence %s
        end
        end
        end''' % splitSeq)


        for i in range(startResid,maxResid()+1):
            xplor.command("patch DEOX  reference=NIL=( resid %d ) end" % i)
            pass

        # rename some atoms
        xplor.command(r'''
        vector do (name "HN'")  ( NAME H61 AND RESNAME ADE )
        vector do (name "HN''")  ( name H62 and resname ADE )
        vector do (name "HN'")  ( name H21 and resname GUA )
        vector do (name "HN''")  ( name H22 and resname GUA )
        vector do (name "HN'")  ( name H41 and resname CYT )
        vector do (name "HN''")  ( name H42 and resname CYT )
        vector do (name "CM")   ( name C5A and resname THY )
        ''')
        
    elif seqType=='rna':
        protocol.initTopology("nucleic",reset=1)
        protocol.initParams("nucleic")

        xplor.command('''
        segment
        name="%s"
        SETUP=TRUE
        number=%d
        ''' % (segName, startResid) + r'''
        chain
        REMARKS  TOPH11.NUC  MACRO for RNA/DNA sequence

        ! this is a macro to define standard DNA/RNA polynucleotide bonds
        ! and termini to generate a sequence.
        ! it should be added as @TOPTRNA8.NUC in the SEGMent SEQUence
        ! level.
        ! 
        ! Axel Brunger, 17-AUG-84


        LINK NUC  HEAD - *  TAIL + *  END
        
        FIRST  5ter  TAIL + * END

        LAST 3TER  HEAD - * END
        
        ''' + '''
        sequence %s
        end
        end
        end''' % splitSeq)


        # rename some atoms
        xplor.command(r'''
        vector do (name "HN'")  ( NAME H61 AND RESNAME ADE )
        vector do (name "HN''")  ( name H62 and resname ADE )
        vector do (name "HN'")  ( name H21 and resname GUA )
        vector do (name "HN''")  ( name H22 and resname GUA )
        vector do (name "HN'")  ( name H41 and resname CYT )
        vector do (name "HN''")  ( name H42 and resname CYT )
        vector do (name "CM")   ( name C5A and resname THY )
        ''')
        

    else:
        from simulationWorld  import SimulationWorld_world as simWorld
        if simWorld().logLevel()!='none':
            print "seqToPSF: Warning: unsupported sequence type: %s" % seqType
        pass

    for (res1,res2) in disulfide_bonds:
        xplor.command("""
        patch DISU 
          reference=1=( resid %d )  
          reference=2=( resid %d )  
        end""" % (res1,res2))
	pass
    
    for (res1,res2) in disulfide_bridges:
	xplor.command("""
        patch DISN
          reference=1=( resid %d )  
          reference=2=( resid %d )  
        end""" % (res1,res2))

    xplor.command("set messages %s echo %s end" % (messages,echo))
        
    return

def renameAtoms(sel='all'):
    from atomSel import AtomSel
    if type(sel)==type('string'): sel = AtomSel(sel)

    for atom in sel:
        name=atom.atomName()

        if name.find("*")>=0: name = name.replace("*","'")

        atom.setAtomName(name)
        pass
    return


def pdbToPSF(pdbRecord,psfFilename=''):
    """given a PDB record, generate XPLOR PSF structure/topology info.

    The PDB record can be a filename or a string containing PDB contents.
    """
    import xplor
    from selectTools import maxResid

    try:
        import os
        os.stat(pdbRecord)
        pdbRecord = open(pdbRecord).read()
    except:
        pass

    xplor.command("SET ECHO=off mess=off END")
    xplor.command("eval ($saveEcho=$prev_echo)")
    xplor.command("eval ($saveMess=$prev_messages)")
    sequences = pdbToSeq(pdbRecord)

    for (beginResid, segid, seq) in sequences:
        seq = renameResidues(seq)
        if segid=='*': segid=''
        #print type, beginResid, segid, seq
        seqToPSF(seq,seqType='auto',startResid=beginResid,segName=segid)
        pass
    if psfFilename:
        xplor.command("write psf output=%s end" % psfFilename)

    #renameAtoms()
    xplor.command("SET ECHO=$saveEcho mess=$saveMess END")
    return