PROGRAM DIRECTED ! READS THE DIRECTED NETWORK FILE, ENUMERATES ALL CYCLES, !! htat are closed with one link from netc.dat IMPLICIT NONE INTEGER :: np,nb,kb,ip,jb,dq,dqm,kbm,j1,j2,nb0,clmax,pst,qq1,qq2,qq3,qq4 INTEGER :: k,p1,p2,key,np1,ir,id,jr,jd,kr,kd,it,jp,ipm,ncl,kp,mp,key2,nrec INTEGER :: k1,k2,p11,p22,ka,pa,ns,w,score,cliq,kb1,kb2,icl,kcl,mcq,kdm,iwarm INTEGER :: cliqsize,cliqnum,nclust,overlap,vmax,cliqmin,jtemp,pn,dn,dmax INTEGER :: juc,jc,dinm, doutm,ky,ncyc,jc4,mult,nbad,nbadn,NBEL,KC,J,JL,LF INTEGER :: LB,IG,NG,KEY1,JLMAX,MODE INTEGER, ALLOCATABLE :: prot(:),nnd(:),d(:),i1(:),i2(:),ncq(:),elim(:,:) INTEGER, ALLOCATABLE ::neighd(:,:),cq(:,:,:),temp(:),clust(:,:),cs(:),dh(:) INTEGER, ALLOCATABLE :: dm(:),dm2(:),clusttemp(:,:),cstemp(:),sp(:),sd(:) INTEGER, ALLOCATABLE :: ccn(:),neighu(:,:),nnu(:),din(:),dout(:) INTEGER, ALLOCATABLE :: mul(:),UP(:),EL(:),LN(:),L(:,:),PL(:) INTEGER, ALLOCATABLE :: PG1(:),PG2(:),PD(:),w1(:),w2(:) !!! FOR CYCLES INTEGER, ALLOCATABLE :: PW(:,:),ACT(:),CY(:,:,:),CYN(:) INTEGER :: NW,NAW,NWN,LV,KV,PT,IW,BR,LM,LVN,CYNM,IV,Q1,Q2,nwmax,lvmax INTEGER :: NWC,ccc,wb,wc1,wc2 REAL, ALLOCATABLE :: SCP(:),SL(:) REAL :: davc,dav,cd,ccd,a1,a2,ST,SG,DMU double precision ran3,te,prob ! setting the parameters LF=50 ! NUMBER OF LAYERS TO BE MAPPED NBEL=1 ! NUMBER OF LINKS TO BE ELIMINATED nrec=0 ! number of reconnecions of each bond in trunk reconnection MODE=1 ! MODE=1 -- Elimination of cycles by counting, ! Mode=0 -- importing ordered file LM=30 ! MAXIMUM LENTG OF A CYCLE CYNM=100000 ! MAXIMUM NUMBER OF CYCLES ! for Rn generator open(40,file='rand.dat') read(40,*)iwarm close (40) ! READING THE PAIRS OF INTERACTIONS Open (10, file='netn.dat',status='old') READ(10,*) nb nb=nb*2 ! for allocation if np>nb ALLOCATE(prot(nb),d(nb),i1(nb),i2(nb),dm(nb),din(nb),dout(nb),dh(nb)) ALLOCATE(mul(nb),UP(nb),PD(NB),EL(NB),L(LF,NB),LN(LF),PL(NB),SL(LF)) nb=nb/2 ccn=0 i1=0 i2=0 jb=0 ! running number of an edge np=0 ! total current number of proteins kdm=0 ! number of dimers din=0 dout=0 dinm=1 doutm=1 ! Purifying: throwing out identical or TRANSPOSED PAIRS do kb=1,nb read (10,*) p1, p2 if(p1.eq.p2) then ! storing dimers ip=1 do while (p1.ne.dm(ip).and.ip.le.kdm) ip=ip+1 end do if(ip.gt.kdm) then kdm=kdm+1 dm(kdm)=p1 end if else if(p1.NE.p2) THEN ! no dimers key=1 do ip=1,jb if(p1.eq.i1(ip).and.p2.eq.i2(ip)) key=0 end do if(key.eq.1) then ! adding new bond jb=jb+1 i1(jb)=p1 i2(jb)=p2 end if end if end do NB=JB ! TOTAL NUMBER OF DISTINCT EDGES WITHOUT DIMERS NB0=NB close(10) IF(nrec.gt.0) CALL trunk(nb,i1,i2,nrec,iwarm) ! FOR TRUNK RECONNECTION open(40,file='rand.dat') write(40,*) -nint(1.e6*ran3(iwarm)) close (40) do kb=1,nb ! RECOGNIZING LINKED PROTEINS p1=i1(kb) p2=i2(kb) key=0 ! first vertex of the edge do ip=1,np if (p1.eq.prot(ip)) then ! the protein on the list key=1 d(ip)=d(ip)+1 dout(ip)=dout(ip)+1 if(dout(ip).gt.dout(doutm)) doutm=ip j1=ip end if end do if(key.eq.0) then ! no protein on the list np=np+1 d(np)=1 dout(np)=1 prot(np)=p1 j1=np end if key=0 ! second vertex of the edge do ip=1,np if (p2.eq.prot(ip)) then ! the protein on the list key=1 d(ip)=d(ip)+1 din(ip)=din(ip)+1 if(din(ip).gt.din(dinm)) dinm=ip j2=ip end if end do if(key.eq.0) then ! no protein on the list np=np+1 d(np)=1 din(np)=1 prot(np)=p2 j2=np end if i1(kb)=j1 i2(kb)=j2 end do ALLOCATE(dm2(np),sp(np),sd(np)) dm2=0 do ip=1,kdm do j1=1,np !recognizing dimers if(prot(j1).eq.dm(ip)) dm2(j1)=1 ! j1 has a dimer end do end do ! finding the nearest neighbours of each protein ALLOCATE(neighd(np,np),nnd(np)) ALLOCATE(neighu(np,np),nnu(np)) nnd=0 nnu=0 do kb=1,nb p1=i1(kb) p2=i2(kb) nnd(p1)=nnd(p1)+1 nnu(p2)=nnu(p2)+1 neighd(p1,nnd(p1))=p2 neighu(p2,nnu(p2))=p1 end do ns=0 write(*,*) 'end of sorting', ' nlinks',nb,'nprot',np,'ndim',kdm !ENDOFSORTING write(*,*) 'max. out degree', dout(doutm), prot(doutm) write(*,*) 'max. in degree', din(dinm), prot(dinm) cd=0.d0 ccd=0.d0 dav=float(nb)/np a1=0.d0 do ip=1,np a1=a1+din(ip)*din(ip) a2=a2+dout(ip)*dout(ip) cd=cd+(din(ip)-dav)*(dout(ip)-dav) ccd=ccd+((din(ip)-dav)*(dout(ip)-dav))**2 end do a1=sqrt(a1-dav*np)/np a2=sqrt(a2-dav*np)/np write(*,*)'correlation',cd/np,sqrt(ccd - cd*cd/np)/np write(*,*)dav, a1, a2 open(unit=20, file='degree_in.dat',status='unknown') !!!!!!! DEGREE HISTOGRAM!!!!! dmax=0 dh=0 DO ip=1,np dh(din(ip))=dh(din(ip))+1 if(din(ip).ge.dmax) then dmax=din(ip) end if END DO do k=1,dmax if(dh(k).ge.1) write(20,*) k, float(dh(k)) end do write(20,109) 109 format('&') k=1 do while (k.lt.dmax.and.dh(k).ge.1) write(20,*) k, float(dh(k))/np k=k+1 end do kp=k-1 k=1 do while (kp+k.le.dmax) do while (dh(kp+k).eq.0) k=k+1 end do KP=KP+K write(20,*) kp, float(dh(kp))/np/k end do K=1 close(20) open(unit=20, file='degree_out.dat',status='unknown') dmax=0 dh=0 DO ip=1,np dh(dout(ip))=dh(dout(ip))+1 if(dout(ip).ge.dmax) then dmax=dout(ip) end if END DO do k=1,dmax if(dh(k).ge.1) write(20,*) k, float(dh(k)) end do write(20,109) k=1 do while (k.lt.dmax.and.dh(k).ge.1) write(20,*) k, float(dh(k))/np k=k+1 end do kp=k-1 k=1 do while (kp+k.le.dmax) do while (dh(kp+k).eq.0) k=k+1 end do KP=KP+K write(20,*) kp, float(dh(kp))/np/k end do K=1 close(20) open(unit=20, file='degree.dat',status='unknown') dmax=0 dh=0 DO ip=1,np dh(d(ip))=dh(d(ip))+1 if(d(ip).ge.dmax) then dmax=d(ip) end if END DO do k=1,dmax if(dh(k).ge.1) write(20,*) k, float(dh(k)) end do write(20,109) k=1 do while (k.lt.dmax.and.dh(k).ge.1) write(20,*) k, float(dh(k))/np k=k+1 end do kp=k-1 k=1 do while (kp+k.le.dmax) do while (dh(kp+k).eq.0) k=k+1 end do KP=KP+K write(20,*) kp, float(dh(kp))/np/k end do K=1 close(20) !!$!!!!!!!! read another network !!$ open(unit=10, file='netc.dat') !!$ read(10,*) wb !!$ ALLOCATE(w1(wb),w2(wb),dw(wb)) !!$ kb=0 !!$ do kb=1,wb !!$ read (10,*) p1, p2 !!$ if(p1.eq.p2) go to 120 ! no dimers !!$ key=0 !!$ do ip=1,np !!$ if(p1.eq.prot(ip)) then !!$ key=key+1 !!$ wp1=ip !!$ else if(p2.eq.prot(ip)) then !!$ key=key+1 !!$ wp2=ip !!$ end if !!$ if (key.eq.2) then ! possibly new link !!$ ib=1 !!$ do while (ib.le.kb.and.(wp1.ne.w1(ib).or.wp2.ne.w2(ib))) !!$ ib=ib+1 !!$ end do !!$ if(ib.eq.kb+1) then !!$ kb=ib !!$ w1(kb)=wp1 !!$ w2(kb)=wp2 !!$ end if !!$ end if !!$120 end do !!$ wb=kb !!$ write(*,*)'second network file', wb,'links' !!$ !!!!!!!!!!!!!ELIMINATION!!!!!!!!!!!!!!!!!!!!!!!!!!! ALLOCATE(PW(1200000,LM),ACT(1200000),CY(LM,CYNM,LM),CYN(LM)) EL=1 IF(MODE.EQ.1) THEN !! ELIMINATING CYCLIC LOOPS VIA COUNTING ALL CYCLES k1=0 NCYC=1 OPEN(UNIT=25,FILE='backlinks_cycle.txt') DO WHILE (NCYC.GE.1) mul=0 ncyc=0 !!!!!!!!!!!!!!!!! search for all cycles!!!!!!!!!!!!!!! CYN=0 nwmax=0 lvmax=0 DO KB=1,NB IF(EL(KB).EQ.1) THEN P1=I1(KB) P2=I2(KB) NW=1 ! NUMBER OF WAY LV=1 ! LEVEL PW(NW,LV)=P2 ! PA(NUMBER OF WAY, NUMBER OF LEVEL) ACT(1)=1 ! ACT(NUMBER OF WAY) =1 -- ACTIVE =0 -DEAD NAW=NW ! NUMBER OF ACTIVE WAY DO WHILE (NAW.GE.1) NWN=NW LVN=LV+1 ! new layer number DO IW=1,NW IF(ACT(IW).EQ.1) THEN ! ACTIVE WAY P11=PW(IW,LV) BR=0 ! BRANCHING NUMBER DO IP=1,NND(P11) PT=NEIGHD(P11,IP) IF(PT.EQ.P1) THEN ! CYCLE OF LV+1 IS FOUND ncyc=ncyc+1 CYN(LVN)=CYN(LVN)+1 ! NUMBER OF CYCLES OF LVN DO KV=1,LV CY(LVN,CYN(LVN),KV)=PW(IW,KV) END DO CY(LVN,CYN(LVN),LVN)=P1 MUL(KB)=MUL(KB)+1 ! UPDATING LINK MULTIPL. ELSE IF(PT.NE.P1) THEN ! NO CYCLE IV=1 ! CHECK IF THE PROTEIN IS ON THE PATHWAY DO WHILE (PT.NE.PW(IW,IV).AND.IV.LE.LV) IV=IV+1 END DO IF(IV.EQ.LVN) THEN ! ADDING BR=BR+1 IF(BR.EQ.1) THEN ! SAME WAY PW(IW,LVN)=PT ELSE IF(BR.GE.2) THEN ! NEW WAY NWN=NWN+1 ! ADDING A NEW INDEX NAW=NAW+1 DO KV=1,LV PW(NWN,KV)=PW(IW,KV) END DO PW(NWN,LVN)=PT ACT(NWN)=1 END IF END IF END IF END DO IF(BR.EQ.0) THEN NAW=NAW-1 IF(NWN.GT.NW) THEN DO KV=1,LVN PW(IW,KV)=PW(NWN,KV) END DO NWN=NWN-1 ELSE ACT(IW)=0 END IF END IF END IF END DO NW=NWN LV=LVN END DO END IF nwmax=max0(nwmax,nw) lvmax=max0(lvmax,lv) END DO !!!!!!!!!!!!!! CHECK!!!!!!!!!!!!!!! ! write(*,*)'nycles', ncyc,'path',nwmax,lvmax IF (NCYC.EQ.0) GO TO 125 ! END OF THE CYCLE ccc=0 do iv=1,LM ccc=ccc+cyn(iv)/iv if (cyn(iv).ge.1.AND.K1.EQ.0) write(*,*)iv,cyn(iv)/iv !only first round end do if(k1.eq.0)write(*,*)'number of cycles',ccc !!!!!!!!!! HERE WE HAVE A LIST OF LINKS WITH MULTIPLICITY MUL!!!!!!!!!!!!!!!! !!!! ORDERING IN DESCENDING MULTIPLICITY!!!!!! KEY=1 DO WHILE (KEY.GE.1) KEY=0 DO JB=1,NB-1 IF(MUL(JB).LT.MUL(JB+1)) THEN ! FLIP KEY=KEY+1 P1=MUL(JB) MUL(JB)=MUL(JB+1) MUL(JB+1)=P1 P1=EL(JB) EL(JB)=EL(JB+1) EL(JB+1)=P1 P1=I1(JB) P2=I2(JB) I1(JB)=I1(JB+1) I2(JB)=I2(JB+1) I1(JB+1)=P1 I2(JB+1)=P2 END IF END DO END DO KY=0 jb=0 k1=k1+1 ! number of eliminated links DO KB=1,NBEL if(mul(kb).ge.1) then write(*,*)'eliminated',kb EL(KB)=0 ! ELIMINATION WRITE(25,*)PROT(I1(KB)),PROT(I2(KB)) jb=jb+1 end if END DO NCYC=NCYC-jb NND=0 NNU=0 neighd=0 neighu=0 do kb=1,nb IF(EL(KB).EQ.1) THEN p1=i1(kb) p2=i2(kb) nnd(p1)=nnd(p1)+1 nnu(p2)=nnu(p2)+1 neighd(p1,nnd(p1))=p2 neighu(p2,nnu(p2))=p1 END IF end do 125 END DO ELSE IF(MODE.EQ.0) THEN !!!!!!!!!!!!!!!Reading the MC-ordered file FROM DIRECTED.F90 !!!!!!!!!!!!!! EL=1 open(unit=10,file='order.txt') ! ordered list from directed.90 DO ip=1,np read(10,*) p1,PD(IP) END DO close(10) !!!!!!!!!!!!!SORTING THE LINKS !!!!!!!!!!!!!!!!!!!!! k1=0 open(unit=10,file='backlinks.txt') ! ordered list from directed.90 DO KB=1,NB P1=I1(KB) P2=I2(KB) IF(PD(P1).GE.PD(P2)) then EL(KB)=0 write(10,*)prot(p1),prot(p2) K1=K1+1 END IF END DO END IF WRITE(*,*)'DELETED LINKS',K1 close(10) !!!!!!!! reflecting deleted links in NN statistics !!!!!!!!!!!!! NND=0 NNU=0 neighd=0 neighu=0 do kb=1,nb IF(EL(KB).EQ.1) THEN p1=i1(kb) p2=i2(kb) nnd(p1)=nnd(p1)+1 nnu(p2)=nnu(p2)+1 neighd(p1,nnd(p1))=p2 neighu(p2,nnu(p2))=p1 END IF end do !!!!!!!!!!!! ordering !!!!!!!!!!!!!!!!!!!!! L=0 ! LIS OF NODES IN A LEVEL LN=0 ! NUMBER OFNODES IN A LEVEL PL=0 ! NUMBER OF A LEVEL WITH A PROTEIN KC=0 ! CYCLIC LINKS OF THE LENGTH 5 AND MORE DO IP=1,NP IF(NND(IP).GE.1.AND.NNU(IP).EQ.0) THEN ! FIRST LAYER LN(1)=LN(1)+1 L(1,LN(1))=IP PL(IP)=1 END IF END DO J=1 DO while (J.le.LF.and.LN(J).GE.1) DO IP=1,LN(J) P1=L(J,IP) DO JP=1,NND(P1) P2=neighd(p1,JP) PL(P2)=J+1 END DO END DO LN=0 L=0 DO IP=1,NP JL=PL(IP) IF(JL.GE.1) THEN LN(JL)=LN(JL)+1 L(JL,LN(JL))=IP END IF END DO WRITE(*,*) DO JL=1,J+1 if(ln(jl).ge.1) WRITE(*,*)JL,LN(JL) END DO J=J+1 END DO !!!!!!!!!!!! Putting all end proteins in the lower layer. JL=1 do while(LN(JL).GE.1) JL=JL+1 END DO JLMAX=JL-1 ! LOWEST POPULATED LEVEL LN=0 L=0 DO IP=1,NP IF(NND(IP).EQ.0) PL(IP)=JLMAX JL=PL(IP) IF(JL.GE.1) THEN LN(JL)=LN(JL)+1 L(JL,LN(JL))=IP END IF END DO write(*,*) write(*,*)'With lowered leaves' DO JL=1,JLMAX WRITE(*,*)JL,LN(JL) END DO !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!! Layer statistics -- STUDYING PROTEINS IN LAYERS !! AVERAGE LINK LENGTH NB0=0 do kb=1,nb p1=i1(kb) p2=i2(kb) IF(EL(KB).NE.0) THEN NB0=NB0+1 LB=LB+PL(P2)-PL(P1) END IF END DO WRITE(*,*) 'AVERAGE LINK LENGTH', 1.D0*LB/NB0, ' LAYERS' !!! OUTPUTTING PROTEINS BY LAYERS open(unit=10,file='layered_proteins.txt') JL=1 DO WHILE (LN(JL).GE.1) WRITE(10,*) JL write(10,*) DO IP=1,NP IF(PL(IP).EQ.JL) WRITE(10,*) PROT(IP) END DO write(10,*) write(10,*) JL=JL+1 END DO close(10) !!$!!!PAROLOGY OF LAYERS!!! !!$ open(unit=10,file='paralogs.dat') !!$ read(10,*)NG !!$! ALLOCATE(PG1(NG),PG2(NG),SCP(NG)) !!$ IG=0 !!$ ST=0 !!$ SL=0 !!$ DO K1=1,NG !!$ READ(10,*)P1,P2,SG !!$ IF(P1.EQ.P2) GO TO 12 !!$ KEY1=0 !!$ KEY2=0 !!$ DO IP=1,NP !!$ IF(PROT(IP).EQ.P1) KEY1=IP !!$ IF(PROT(IP).EQ.P2) KEY2=IP !!$ END DO !!$ IF(KEY1*KEY2.GT.0) THEN !!$ !! ST=ST+SG !!$ IF(NND(KEY1)*NND(KEY2).GT.0) ST=ST+SG !!$ IF(PL(KEY1).EQ.PL(KEY2)) THEN !!$ SL(PL(KEY1))=SL(PL(KEY1))+SG !!$ END IF !!$ END IF !!$12 END DO !!$ !!$ !!$ !!$!! ST=2.*ST/NP/(NP-1) !!$ ST=2.*ST/(NP-LN(JLMAX))/(NP-1-LN(JLMAX)) !!$ WRITE(*,*)'AVERAGE PAROLOGY FOR ALL',NP, 'PROTEINS',ST !!$ !!$ JL=1 !!$ WRITE(*,*)'layer#, # of proteins, layer paralogy' !!$ DO WHILE (LN(JL).GE.2) !!$ WRITE(*,*) !!$ SL(JL)=2.*SL(JL)/LN(JL)/(LN(JL)-1) !!$ WRITE(*,*)JL,LN(JL),SL(JL) !!$ JL=JL+1 !!$ END DO !!$ END PROGRAM DIRECTED SUBROUTINE trunk(nb,i1,i2,nrec,iwarm) ! FOR TRUNK RECONNECTION IMPLICIT NONE INTEGER NB,I1(NB),I2(NB),nrec,k1,k2,i,iwarm,temp,j,key double precision ran3 do i=1,nrec*nb k1=int(ran3(iwarm)*(nb+1)) k2=int(ran3(iwarm)*(nb+1)) if(k1*k2.eq.0.or.k1.eq.k2) go to 10 if(i1(k1).eq.i2(k2).or.i2(k1).eq.i1(k2)) go to 10 ! checking if we create a doublelink key=0 do j=1,nb if(i1(k1).eq.i1(j).and.i2(k2).eq.i2(j))key=1 if(i2(k1).eq.i2(j).and.i1(k2).eq.i1(j))key=1 end do if(key.eq.1) go to 10 temp=i1(k1) i1(k1)=i1(k2) i1(k2)=temp 10 continue end do ! check for the number of doublelinks END SUBROUTINE trunk ! ran3 from Numerical Recipes, 2nd edition !-------------------------------------------------------------------------- function ran3(idum) !========================================================================== ! implicit real*4(m) integer idum integer mbig,seed,mz ! real mbig,seed,mz double precision ran3,fac ! parameter (mbig=4000000.,mseed=1618033.,mz=0.,fac=2.5e-7) ! parameter (mbig=1000000000,mseed=161803398,mz=0,fac=1.e-9) parameter (mbig=1000000000,mseed=161803398,mz=0,fac=1./mbig) integer i,iff,ii,inext,inextp,k integer mj,mk,ma(55) ! real mj,mk,ma(55) save iff,inext,inextp,ma data iff /0/ 1 if(idum.lt.0.or.iff.eq.0)then iff=1 mj=mseed-iabs(idum) mj=mod(mj,mbig) ma(55)=mj mk=1 do 11 i=1,54 ii=mod(21*i,55) ma(ii)=mk mk=mj-mk if(mk.lt.mz)mk=mk+mbig mj=ma(ii) 11 continue do 13 k=1,4 do 12 i=1,55 ma(i)=ma(i)-ma(1+mod(i+30,55)) if(ma(i).lt.mz)ma(i)=ma(i)+mbig 12 continue 13 continue inext=0 inextp=31 idum=1 endif inext=inext+1 if(inext.eq.56)inext=1 inextp=inextp+1 if(inextp.eq.56)inextp=1 mj=ma(inext)-ma(inextp) if(mj.lt.mz)mj=mj+mbig ma(inext)=mj ran3=mj*fac if(ran3.le.0.or.ran3.ge.1) goto 1 !! if(ran3.le.0.or.ran3.ge.1) Then !! write(6,*)'RAN3 failed: ', ran3 ! stop !! endif return end