/*! \file mpigadgetio.cxx * \brief this file contains routines used with MPI compilation and gadget io and domain construction. */ #ifdef USEMPI //-- For MPI #include "stf.h" #include "gadgetitems.h" #include "endianutils.h" /// \name Gadget Domain decomposition //@{ /*! Determine the domain decomposition.\n Here the domains are constructured in data units only ThisTask==0 should call this routine. It is tricky to get appropriate load balancing and correct number of particles per processor.\n I could use recursive binary splitting like kd-tree along most spread axis till have appropriate number of volumes corresponding to number of processors. NOTE: assume that cannot store data so position information is read Nsplit times to determine boundaries of subvolumes could also randomly subsample system and produce tree from that should store for each processor the node structure generated by the domain decomposition what I could do is read file twice, one to get extent and other to calculate entropy then decompose along some primary axis, then choose orthogonal axis, keep iterating till have appropriate number of subvolumes then store the boundaries of the subvolume. this means I don't store data but get at least reasonable domain decomposition NOTE: pkdgrav uses orthoganl recursive bisection along with kd-tree, gadget-2 uses peno-hilbert curve to map particles and oct-trees the question with either method is guaranteeing load balancing. for ORB achieved by splitting (sub)volume along a dimension (say one with largest spread or max entropy) such that either side of the cut has approximately the same number of particles (ie: median splitting). But for both cases, load balancing requires particle information so I must load the system then move particles about to ensure load balancing. Main thing first is get the dimensional extent of the system. then I could get initial splitting just using mid point between boundaries along each dimension. once have that initial splitting just load data then start shifting data around. */ void MPIDomainExtentGadget(Options &opt){ #define SKIP2 Fgad[i].read((char*)&dummy, sizeof(dummy)); Int_t i,j,k,n,m,temp,pc,pc_new, Ntot; int dummy; FLOAT ctemp[3]; char buf[200]; char DATA[5]; fstream *Fgad; struct gadget_header *header; if (ThisTask==0) { Fgad=new fstream[opt.num_files]; header=new gadget_header[opt.num_files]; for(i=0; i1) sprintf(buf,"%s.%d",opt.fname,int(i)); else sprintf(buf,"%s",opt.fname); Fgad[i].open(buf,ios::in); if(!Fgad[i]) { cout<<"can't open file "<mpi_xlim[m][1]) mpi_xlim[m][1]=ctemp[m];} else if (opt.partsearchtype==PSTDARK) if (!k==GASTYPE||k==STARTYPE) for (m=0;m<3;m++) {if (ctemp[m]mpi_xlim[m][1]) mpi_xlim[m][1]=ctemp[m];} else if (opt.partsearchtype==PSTSTAR) if (k==STARTYPE) for (m=0;m<3;m++) {if (ctemp[m]mpi_xlim[m][1]) mpi_xlim[m][1]=ctemp[m];} else if (opt.partsearchtype==PSTGAS) if (k==GASTYPE) for (m=0;m<3;m++) {if (ctemp[m]mpi_xlim[m][1]) mpi_xlim[m][1]=ctemp[m];} } } } */ //There may be issues with particles exactly on the edge of a domain so before expanded limits by a small amount //now only done if a specific compile option passed #ifdef MPIEXPANDLIM for (int j=0;j<3;j++) { Double_t dx=0.001*(mpi_xlim[j][1]-mpi_xlim[j][0]); mpi_xlim[j][0]-=dx;mpi_xlim[j][1]+=dx; } #endif } //make sure limits have been found MPI_Barrier(MPI_COMM_WORLD); if (NProcs==1) { for (i=0;i<3;i++) { mpi_domain[ThisTask].bnd[i][0]=mpi_xlim[i][0]; mpi_domain[ThisTask].bnd[i][1]=mpi_xlim[i][1]; } } } ///to update the decomposition based on gadget information void MPIDomainDecompositionGadget(Options &opt){ #define SKIP2 Fgad[i].read((char*)&dummy, sizeof(dummy)); Int_t i,j,k,n,m,temp,pc,pc_new, Ntot; int Nsplit,isplit; Int_t nbins1d,nbins3d, ibin[3]; Double_t **histo1d;//for initial projection and estimate of domain decomposition //Double_t ***histo3d,**histo2d;//finer scale histo for more accurate decomposition Double_t *histo3d,*histo2d;//finer scale histo for more accurate decomposition Coordinate *histo3dbinval; Coordinate2D *histo2dbinval; int dummy; FLOAT ctemp[3]; char buf[200]; char DATA[5]; fstream *Fgad; struct gadget_header *header; Double_t a; int b; if (ThisTask==0) { /* cout<<"domain decomposition"<1) sprintf(buf,"%s.%d",opt.fname,i); else sprintf(buf,"%s",opt.fname); Fgad[i].open(buf,ios::in); if(!Fgad[i]) { cout<<"can't open file "<Push(j,1.0/var[j]); //pq->Push(j,e); } for (j=0;j<3;j++) {mpi_ideltax[j]=pq->TopQueue();pq->Pop();} delete pq; //determine the number of splits in each dimension Nsplit=log((float)NProcs)/log(2.0); isplit=0; for (j=0;j<3;j++) mpi_nxsplit[j]=0; for (j=0;j=Ntot*nfac*kbnd) { //no interpolation // double value=mpi_xlim[j][0]+((mpi_xlim[j][1]-mpi_xlim[j][0])/(Double_t)nbins1d)*(i-0.5); //linear interpolation on log scale double value=mpi_xlim[j][0]+((mpi_xlim[j][1]-mpi_xlim[j][0])/(Double_t)nbins1d)*(i-1)+((mpi_xlim[j][1]-mpi_xlim[j][0])/(Double_t)nbins1d)/(log(histo1d[j][i]))*(log(sum)-log(Ntot*nfac*kbnd)); histo3dbinval[(int)kbnd][m]=value; if (m<2) histo2dbinval[(int)kbnd][m]=value; kbnd+=1.0; } } } histo3d=new Double_t[nbins3d*nbins3d*nbins3d]; histo2d=new Double_t[nbins3d*nbins3d]; Int_t nbins3d3=nbins3d*nbins3d*nbins3d; Int_t nbins3d2=nbins3d*nbins3d; for (i=0;i1) sprintf(buf,"%s.%d",opt.fname,i); else sprintf(buf,"%s",opt.fname); Fgad[i].open(buf,ios::in); #ifdef GADGET2FORMAT SKIP2; Fgad[i].read((char*)&DATA[0],sizeof(char)*4);DATA[4] = '\0'; SKIP2; SKIP2; fprintf(stderr,"reading %s",DATA); #endif Fgad[i].read((char*)&dummy, sizeof(dummy)); Fgad[i].read((char*)&header[i], sizeof(gadget_header)); Fgad[i].read((char*)&dummy, sizeof(dummy)); //endian indep call header[i].Endian(); #ifdef GADGET2FORMAT SKIP2; Fgad[i].read((char*)&DATA[0],sizeof(char)*4);DATA[4] = '\0'; SKIP2; SKIP2; fprintf(stderr,"reading %s",DATA); #endif Fgad[i].read((char*)&dummy, sizeof(dummy)); for (k=0;k<6;k++) { for (n=0;n=histo3dbinval[j][m]&&ctemp[mpi_ideltax[m]]=Ntot/(Double_t)mpi_nxsplit[ix]) { end[0]=n1; //bndval[0]=histo3dbinval[n1][0]; //interpolate boundary value //bndval[0]=histo3dbinval[n1][0]+ // (Ntot/(Double_t)mpi_nxsplit[ix]-binsum[0])*(histo3dbinval[n1+1][0]-histo3dbinval[n1][0])/(histo1d[0][n1+1]); bndval[0]=histo3dbinval[n1][0]+ (log(Ntot/(Double_t)mpi_nxsplit[ix])-log(binsum[0]))*(histo3dbinval[n1+1][0]-histo3dbinval[n1][0])/(log(histo1d[0][n1+1])); break; } } if(i1) for (j=0;j=Ntot/(Double_t)(mpi_nxsplit[ix]*mpi_nxsplit[iy])&&binsum[1]-lastbin1) for (k=0;k=Ntot/(Double_t)(mpi_nxsplit[ix]*mpi_nxsplit[iy]*mpi_nxsplit[iz])&&binsum[2]-lastbin1) { MPIDomainExtentGadget(opt); MPIInitialDomainDecomposition(opt); MPIDomainDecompositionGadget(opt); Int_t i,j,k,n,m,temp,pc,pc_new, Ntot,indark,ingas,instar; Int_t idval; Int_t ntot_withmasses; int dummy; double z,aadjust,Hubble; FLOAT ctemp[3], dtemp; char buf[200]; char DATA[5]; fstream *Fgad; struct gadget_header *header; Int_t Nlocalold=Nlocal; int *ireadtask,*readtaskID; ireadtask=new int[NProcs]; readtaskID=new int[opt.nsnapread]; std::vector ireadfile(opt.num_files); MPIDistributeReadTasks(opt,ireadtask,readtaskID); MPI_Status status; Int_t Nlocalbuf,ibuf=0,*Nbuf, *Nbaryonbuf; Nbuf=new Int_t[NProcs]; Nbaryonbuf=new Int_t[NProcs]; for (int j=0;j=0) { MPISetFilesRead(opt,ireadfile,ireadtask); for(i=0; i1) sprintf(buf,"%s.%d",opt.fname,int(i)); else sprintf(buf,"%s",opt.fname); Fgad[i].open(buf,ios::in); #ifdef GADGET2FORMAT SKIP2; Fgad[i].read((char*)&DATA[0],sizeof(char)*4);DATA[4] = '\0'; SKIP2; SKIP2; fprintf(stderr,"reading... %s\n",DATA); #endif Fgad[i].read((char*)&dummy, sizeof(dummy)); Fgad[i].read((char*)&header[i], sizeof(gadget_header)); Fgad[i].read((char*)&dummy, sizeof(dummy)); //endian indep call header[i].Endian(); #ifdef GADGET2FORMAT Fgad[i].read((char*)&dummy, sizeof(dummy)); Fgad[i].read((char*)&DATA[0],sizeof(char)*4);DATA[4] = '\0'; Fgad[i].read((char*)&dummy, sizeof(dummy)); Fgad[i].read((char*)&dummy, sizeof(dummy)); #endif Fgad[i].read((char*)&dummy, sizeof(dummy)); for(k=0;k