/* last modified 7 Sep. 04 */ /* This QCDOC program is for SU(GROUP) lattice gauge fields. The lattice dimensions are in size[]. The number of lattice dimensions is arbitrary. The boundaries are periodic; each dimension should be even for the checkerboard updating used. HITS = metropolis hits per link monte() updates lattice with conventinal metropolis algorithm overrelax() does an overrelaxation algorithm; somewhat faster at decorrelating once the energy is near the right value */ /* some timings 8^4 SU(3) 1 node 2.83 sec/sweep, speedup=1 2 nodes 1.55 sec/sweep, speedup=1.82 4 nodes 0.84 sec/sweep, speedup=3.37 8 nodes 0.471 sec/sweep, speedup=6.01 16 nodes 0.266 sec/sweep, speedup=10.64 16^4 SU(3) (needs at least 8 nodes) 8 nodes 6.93 sec/sweep 16 nodes 3.77 sec/sweep 8^4 on 8 nodes: about 37% of time in getlinks() */ /* size and other parameters */ #define DIMENSION 4 static int size[DIMENSION]={8,8,8,8}; double beta=5.71; #define GROUP 3 #define HITS 10 /* DEBUG turns on more error checking in the communication routines */ #define DEBUG 1 /* maximum length message queue allowed */ #define QLEN 64 #define BLOCKSIZE (2*GROUP*GROUP*sizeof(double)) #include #include #include #include #include "include/qmemcpyoc.C" class matrix { public: double real[GROUP][GROUP]; double imag[GROUP][GROUP]; void conjugate(); void project(); /* projects onto the gauge group */ void determinant(double& rez, double& imz); void operator = (const double x); void printmatrix(); }; /* for the Metropolis table */ #define TABLELENGTH 64 matrix table[TABLELENGTH]; /* global variables */ int nsites; int nlinks; int vectorlength, maxvlength; /* work with groups of links together */ int oldsite=-1, oldlink=-1; /* to help makelinklist avoid duplication */ int minionnode,maxionnode; /* first and last+1 onnode link indices */ double rfactor=(1.0/RAND_MAX); /* for turning rand into double */ int flopcount; /* not all properly flops counted if GROUP>3 */ matrix zeromatrix; /* convenient for resetting matrices */ matrix tempmatrix; matrix *atemp, *btemp, *ctemp; double *sold=NULL, *snew=NULL; /* vector of old and trial actions */ double stot=0.0, eds=0.0, acc=0.0; /* global accumulators */ /* eds will accumulate exp(delta action) where delta action is the trial action minus the old action used in the metropolis algorithm. When in equilibrium eds should fluctuate about unity. */ int *sitelist; /* pointer to a block of onnode sites */ int *linklist; /* an array of links for fetching */ /* prototype things */ void prod(matrix* g1, matrix* g2, matrix* g3); void init(); void cleanup(); void monte(); void renorm(); void overrelax(); void staple(matrix *st,int site,int link); void metro(matrix *old,matrix *trial,double bias); void vgroup(matrix *g); void thirdrow(matrix* g); /* special for su(3) */ void vcopy(matrix *g1, matrix *g2); void getlinks(matrix *g,int site,int link); void getconjugate(matrix *g,int site,int link); void savelinks(matrix *g,int site,int link); void vtprod(matrix *g1,matrix *g2,double *s); void ranmat(matrix *g); void vprod(matrix *g1,matrix *g2,matrix *g3); void makelinklist(int site,int link); void newtable(); void site2coords(int site, int *x); int coords2site(int *x); int parity(int site); int shiftsite(int site,int link,int shift); int main() { int i,iter; double mytime; printf("clocks per sec = %ld\n",CLOCKS_PER_SEC); mycom.start(); init(); printf("lattice size:"); for (i=0;i>localbits) #define address(index) (datastart+(index&mask)) #define onnode(index) (processor==node(remaplink(index))) void * allocate(int n, int size){ /* allocate space for n things of given size divide evenly and round up to a power of two on each processor */ int pernode; void * ptr; pernode=n; /* amount per processor rounded up to a power of two */ pernode = (pernode + NumNodes() - 1) / NumNodes(); pernode--; /* change most significant bit if a power of two */ localbits = 1; while (pernode >>= 1) /* find most significant bit */ localbits++; pernode=(1<=0;j--){ t=div(x[j],size[j]>>1); result=result*(size[j]>>1)+t.rem; toppart=2*toppart+t.quot; } result+=(toppart*(nsites>>DIMENSION)); return result; } int remaplink(int i){ div_t t; t=div(i,DIMENSION); return remapsite[t.quot] * DIMENSION + t.rem; } inline void fetch(matrix* u, int index) { index=remaplink(index); mycom.qmemcpy(processor, u, node(index),address(index), sizeof(matrix)); } inline void store(matrix* u, int index) { index=remaplink(index); mycom.qmemcpy(node(index),address(index), processor, u, sizeof(matrix)); } void init() { int i,iv; flopcount=0; zeromatrix=0.; /* seed random number generator */ srand(1234*(UniqueID()+1)); #if DEBUG printf("first random number = %g\n",(rfactor*rand())); #endif /* calculate nsites, nlinks */ nsites=1; for (i=0;imaxvlength) die("too large a vector"); staple(ctemp,*sitelist,link); /* get neighborhood */ /* get old links and calculate action */ getlinks(atemp,*sitelist,link); vtprod(atemp,ctemp,sold); for (hit=0;hitmaxvlength) die("too large a vector"); staple(ctemp,*sitelist,link); /* get neighborhood */ /* get old link and calculate action */ getlinks(atemp,*sitelist,link); vtprod(atemp,ctemp,sold); /* find trial element and new action */ vcopy(ctemp,btemp); vgroup(btemp); for(iv=0;ivQLEN); fetch(g+iv,linklist[iv]); } mycom.finishcopy(); return; } void getconjugate(matrix *g,int site,int link) { /* gather conjugate links into vector g */ int iv; getlinks(g,site,link); for(iv=0;iv=TABLELENGTH) k=0; g[iv]=table[k]; if ((rfactor*rand())<.5) g[iv].conjugate(); k++; } flopcount+=vectorlength; return; } #if (GROUP==3) void thirdrow(matrix* g) { /* for su(3) construct third row from first two */ int i,j,k; for (i=0;i<3;i++) { j= (i==2) ? 0 : i+1; k= (i==0) ? 2 : i-1; g->real[2][i]= +g->real[0][j]*g->real[1][k] -g->imag[0][j]*g->imag[1][k] -g->real[1][j]*g->real[0][k] +g->imag[1][j]*g->imag[0][k]; g->imag[2][i]= -g->real[0][j]*g->imag[1][k] -g->imag[0][j]*g->real[1][k] +g->real[1][j]*g->imag[0][k] +g->imag[1][j]*g->real[0][k]; } flopcount+=14*GROUP; return; } #endif void prod(matrix* g1, matrix* g2, matrix* g3) { /* overloading * does not seem as fast as this function call */ int i,j,k; matrix * t; if ((g1==g3)||(g2==g3)) /* use temporary */ t=&tempmatrix; else t=g3; for( i=0;ireal[i][j]= +g1->real[i][0]*g2->real[0][j] -g1->imag[i][0]*g2->imag[0][j]; t->imag[i][j]= +g1->real[i][0]*g2->imag[0][j] +g1->imag[i][0]*g2->real[0][j]; /* further unwrapping gains nothing substantial */ for (k=1;kreal[i][j]+=g1->real[i][k]*g2->real[k][j] -g1->imag[i][k]*g2->imag[k][j]; t->imag[i][j]+= g1->real[i][k]*g2->imag[k][j] +g1->imag[i][k]*g2->real[k][j]; } } } flopcount+=GROUP*(GROUP-(GROUP==3))*(6+(GROUP-1)*8); # if (GROUP==3) thirdrow(t); #endif if (t!=g3) *g3=*t; return; } void vprod(matrix *g1,matrix *g2,matrix *g3) { int iv; for(iv=0;iv=nsites) site-=nsites; /* and less than nsites */ k=0; while (site) { t=div(site,size[k]); site=t.quot; x[k++]=t.rem; } while(k=0;k--) { site*=size[k]; site+=x[k]; } #if DEBUG if (site>=nsites) die("bad site index"); #endif return site; } void makelinklist(int site,int link) { /* construct linklist[] to be a vector of links for fetching if site is shifted from sitelist[0], make the corresponding shift for the links, taking into account boundary conditions */ int iv; while (site<0) site+=nsites; while (site>=nsites) site-=nsites; if ((site==oldsite)&&(link==oldlink)) { return; /* reuse old list */ } oldsite=site; oldlink=link; if (site==sitelist[0]){ for(iv=0;iv=size[i]) x[i]-=size[i]; /* periodic boundaries */ } linklist[iv]=link+DIMENSION*coords2site(x); } } return; } int shiftsite(int site,int link,int shift) { /* returns site shifted in direction of link by amount shift */ int x[DIMENSION]; site2coords(site,x); x[link]+=shift; while (x[link]<0) x[link]+=size[link]; while (x[link]>=size[link]) x[link]-=size[link]; return coords2site(x); } int parity(int site) { /* returns 1 if an odd site */ int k,result,x[DIMENSION]; site2coords(site,x); result=0; for (k=0;k3 /* don't need this for su2 or su3 */ void matrix::determinant(double& detr, double& deti) { /* subroutine to calculate determinant of matrix */ /* could perhaps improve later by doing group=2 and 3 by hand, but as of now those cases don't call this routine */ int i,j,k,im,km; double temp; matrix copy; copy=*this; im=GROUP-1; for (i=0;i