Skip to content

Commit

Permalink
Merge pull request #230 from tlunttil/faster_stateq
Browse files Browse the repository at this point in the history
Faster stateq
  • Loading branch information
allegroLeiden authored May 18, 2017
2 parents 5926a51 + c18af20 commit 081d95e
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 100 deletions.
15 changes: 1 addition & 14 deletions src/aux.c
Original file line number Diff line number Diff line change
Expand Up @@ -658,19 +658,6 @@ Eventually I hope readOrBuildGrid() will be unilaterally called within LIME; if
} /* otherwise leave it at NULL - we will not be using it. */
}

/*....................................................................*/
float
invSqrt(float x){
/* The magic Quake(TM) fast inverse square root algorithm */
/* Can _only_ be used on 32-bit machine architectures */
float xhalf = 0.5f*x;
int i = *(int*)&x;
i = 0x5f3759df - (i>>1);
x = *(float*)&i;
x = x*(1.5f - xhalf*x*x);
return x;
}

/*....................................................................*/
void checkGridDensities(configInfo *par, struct grid *gp){
/* This checks that none of the density samples is too small. */
Expand Down Expand Up @@ -891,7 +878,7 @@ While this is off however, other gsl_* etc calls will not exit if they encounter
double *halfFirstDs; // and included them in private() I guess.
mp=malloc(sizeof(gridPointData)*par->nSpecies);

#pragma omp for schedule(dynamic)
#pragma omp for
for(id=0;id<par->pIntensity;id++){
#pragma omp atomic
++nVerticesDone;
Expand Down
1 change: 0 additions & 1 deletion src/lime.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,6 @@ void getVelocities(configInfo *, struct grid *);
void getVelocities_pregrid(configInfo *, struct grid *);
void gridPopsInit(configInfo*, molData*, struct grid*);
void input(inputPars*, image*);
float invSqrt(float);
void levelPops(molData*, configInfo*, struct grid*, int*, double*, double*, const int);
void lineBlend(molData*, configInfo*, struct blendInfo*);
void LTE(configInfo*, struct grid*, molData*);
Expand Down
142 changes: 57 additions & 85 deletions src/stateq.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,70 +12,81 @@
#include <gsl/gsl_permutation.h>
#include <gsl/gsl_errno.h>

struct collPartMatrixType {
double *ctot;
gsl_matrix *colli;
};

/*....................................................................*/
void
getFixedMatrix(molData *md, int ispec, struct grid *gp, int id, struct collPartMatrixType **collPartMat){
getFixedMatrix(molData *md, int ispec, struct grid *gp, int id, gsl_matrix *colli, configInfo *par){
int ipart,k,l,ti;
(*collPartMat) = malloc(sizeof(**collPartMat)*md[ispec].npart);

/* Initialize matrix with zeros */
for(ipart=0;ipart<md[ispec].npart;ipart++){
if(md[ispec].nlev<=0){
if(!silent) bail_out("Matrix initialization error in stateq");
exit(1);
}

(*collPartMat)[ipart].colli = gsl_matrix_alloc(md[ispec].nlev+1,md[ispec].nlev+1);
(*collPartMat)[ipart].ctot = malloc(sizeof(double)*md[ispec].nlev);
for(k=0;k<md[ispec].nlev+1;k++){
for(l=0;l<md[ispec].nlev+1;l++){
gsl_matrix_set((*collPartMat)[ipart].colli, k, l, 0.0);
}
}
if(md[ispec].nlev<=0){
if(!silent) bail_out("Matrix initialization error in stateq");
exit(1);
}
gsl_matrix_set_zero(colli);

/* Populate matrix with collisional transitions */
for(ipart=0;ipart<md[ispec].npart;ipart++){
struct cpData part = md[ispec].part[ipart];
double *downrates = part.down;
int di = md[ispec].part[ipart].densityIndex;
if (di<0) continue;

for(ti=0;ti<part.ntrans;ti++){
int coeff_index = ti*part.ntemp + gp[id].mol[ispec].partner[ipart].t_binlow;
double down = downrates[coeff_index]\
+ gp[id].mol[ispec].partner[ipart].interp_coeff*(downrates[coeff_index+1]\
- downrates[coeff_index]);
double up = down*md[ispec].gstat[part.lcu[ti]]/md[ispec].gstat[part.lcl[ti]]\
*exp(-HCKB*(md[ispec].eterm[part.lcu[ti]]-md[ispec].eterm[part.lcl[ti]])/gp[id].t[0]);
gsl_matrix_set((*collPartMat)[ipart].colli, part.lcu[ti], part.lcl[ti], down);
gsl_matrix_set((*collPartMat)[ipart].colli, part.lcl[ti], part.lcu[ti], up);

gsl_matrix_set(colli, part.lcu[ti], part.lcl[ti], gsl_matrix_get(colli, part.lcu[ti], part.lcl[ti]) - down*gp[id].dens[di]);
gsl_matrix_set(colli, part.lcl[ti], part.lcu[ti], gsl_matrix_get(colli, part.lcl[ti], part.lcu[ti]) - up*gp[id].dens[di]);
}

}

/* Does this work with >1 coll. part? */
double *ctot = malloc(sizeof(double)*md[ispec].nlev);
for(k=0;k<md[ispec].nlev;k++){
ctot[k]=0.0;
for(l=0;l<md[ispec].nlev;l++)
ctot[k] += gsl_matrix_get(colli,k,l);
gsl_matrix_set(colli,k,k,gsl_matrix_get(colli,k,k) - ctot[k]);
}
free(ctot);

/* For some reason collision matrix as constructed above is transposed.
Someone who is not this lazy could fix the loops instead of using gsl_matrix_transpose. */
gsl_matrix_transpose(colli);

double *girtot = malloc(sizeof(double)*md[ispec].nlev);
if(par->girdatfile!=NULL){
for(k=0;k<md[ispec].nlev;k++){
(*collPartMat)[ipart].ctot[k]=0.0;
girtot[k] = 0;
for(l=0;l<md[ispec].nlev;l++)
(*collPartMat)[ipart].ctot[k] += gsl_matrix_get((*collPartMat)[ipart].colli,k,l);
girtot[k] += md[ispec].gir[k*md[ispec].nlev+l];
}
for(k=0;k<md[ispec].nlev;k++){
gsl_matrix_set(colli,k,k,gsl_matrix_get(colli,k,k)+girtot[k]);
for(l=0;l<md[ispec].nlev;l++){
if(k!=l){
if(par->girdatfile!=NULL)
gsl_matrix_set(colli,k,l,gsl_matrix_get(colli,k,l)-md[ispec].gir[l*md[ispec].nlev+k]);
}
}
}
}
free(girtot);
}

/*....................................................................*/
void
getMatrix(int id, gsl_matrix *matrix, molData *md, struct grid *gp, int ispec, gridPointData *mp, struct collPartMatrixType *collPartMat, configInfo *par){
int k,l,li,ipart,di;
double *girtot;
getMatrix(gsl_matrix *matrix, molData *md, int ispec, gridPointData *mp, gsl_matrix *colli){
int k,l,li;

girtot = malloc(sizeof(double)*md[ispec].nlev);

/* Initialize matrix with zeros */
for(k=0;k<md[ispec].nlev+1;k++){
for(l=0;l<md[ispec].nlev+1;l++){
gsl_matrix_set(matrix, k, l, 0.);
}
}
/* Initialize matrix by copying the fixed part */
gsl_matrix_memcpy(matrix, colli);

/* Populate matrix with radiative transitions */
for(li=0;li<md[ispec].nline;li++){
Expand All @@ -86,37 +97,6 @@ getMatrix(int id, gsl_matrix *matrix, molData *md, struct grid *gp, int ispec, g
gsl_matrix_set(matrix, k, l, gsl_matrix_get(matrix, k, l)-md[ispec].beinstl[li]*mp[ispec].jbar[li]);
gsl_matrix_set(matrix, l, k, gsl_matrix_get(matrix, l, k)-md[ispec].beinstu[li]*mp[ispec].jbar[li]-md[ispec].aeinst[li]);
}

if(par->girdatfile!=NULL){
for(k=0;k<md[ispec].nlev;k++){
girtot[k] = 0;
for(l=0;l<md[ispec].nlev;l++)
girtot[k] += md[ispec].gir[k*md[ispec].nlev+l];
}
}

for(k=0;k<md[ispec].nlev;k++){
for(ipart=0;ipart<md[ispec].npart;ipart++){
di = md[ispec].part[ipart].densityIndex;
if(di>=0)
gsl_matrix_set(matrix,k,k,gsl_matrix_get(matrix,k,k)+gp[id].dens[di]*collPartMat[ipart].ctot[k]);
}
if(par->girdatfile!=NULL)
gsl_matrix_set(matrix,k,k,gsl_matrix_get(matrix,k,k)+girtot[k]);
for(l=0;l<md[ispec].nlev;l++){
if(k!=l){
for(ipart=0;ipart<md[ispec].npart;ipart++){
di = md[ispec].part[ipart].densityIndex;
if(di>=0)
gsl_matrix_set(matrix,k,l,gsl_matrix_get(matrix,k,l)-gp[id].dens[di]*gsl_matrix_get(collPartMat[ipart].colli,l,k));
}
if(par->girdatfile!=NULL)
gsl_matrix_set(matrix,k,l,gsl_matrix_get(matrix,k,l)-md[ispec].gir[l*md[ispec].nlev+k]);
}
}
gsl_matrix_set(matrix, md[ispec].nlev, k, 1.);
gsl_matrix_set(matrix, k, md[ispec].nlev, 0.);
}
}

/*....................................................................*/
Expand All @@ -125,14 +105,13 @@ stateq(int id, struct grid *gp, molData *md, const int ispec, configInfo *par\
, struct blendInfo blends, int nextMolWithBlend, gridPointData *mp\
, double *halfFirstDs, _Bool *luWarningGiven){

int t,s,iter,status,ipart;
int t,s,iter,status;
double *opop,*oopop,*tempNewPop=NULL;
double diff;
char errStr[80];
struct collPartMatrixType *collPartMat=NULL;

gsl_matrix *matrix = gsl_matrix_alloc(md[ispec].nlev+1, md[ispec].nlev+1);
gsl_matrix *reduc = gsl_matrix_alloc(md[ispec].nlev, md[ispec].nlev);
gsl_matrix *colli = gsl_matrix_alloc(md[ispec].nlev, md[ispec].nlev);
gsl_matrix *matrix = gsl_matrix_alloc(md[ispec].nlev, md[ispec].nlev);
gsl_vector *newpop = gsl_vector_alloc(md[ispec].nlev);
gsl_vector *rhVec = gsl_vector_alloc(md[ispec].nlev);
gsl_permutation *p = gsl_permutation_alloc (md[ispec].nlev);
Expand All @@ -150,20 +129,19 @@ stateq(int id, struct grid *gp, molData *md, const int ispec, configInfo *par\
diff=1;
iter=0;

getFixedMatrix(md,ispec,gp,id,&collPartMat);
getFixedMatrix(md,ispec,gp,id,colli,par);

while((diff>TOL && iter<MAXITER) || iter<5){
getjbar(id,md,gp,ispec,par,blends,nextMolWithBlend,mp,halfFirstDs);

getMatrix(id,matrix,md,gp,ispec,mp,collPartMat,par);
getMatrix(matrix,md,ispec,mp,colli);

/* this could also be done in getFixedMatrix */
for(s=0;s<md[ispec].nlev;s++){
for(t=0;t<md[ispec].nlev-1;t++){
gsl_matrix_set(reduc,t,s,gsl_matrix_get(matrix,t,s));
}
gsl_matrix_set(reduc,md[ispec].nlev-1,s,1.);
gsl_matrix_set(matrix,md[ispec].nlev-1,s,1.);
}

status = gsl_linalg_LU_decomp(reduc,p,&s);
status = gsl_linalg_LU_decomp(matrix,p,&s);
if(status){
if(!silent){
sprintf(errStr, "LU decomposition failed for point %d, iteration %d (GSL error %d).", id, iter, status);
Expand All @@ -172,7 +150,7 @@ stateq(int id, struct grid *gp, molData *md, const int ispec, configInfo *par\
exit(1);
}

status = gsl_linalg_LU_solve(reduc,p,rhVec,newpop);
status = gsl_linalg_LU_solve(matrix,p,rhVec,newpop);
if(status){
if(!silent && !(*luWarningGiven)){
*luWarningGiven = 1;
Expand All @@ -191,7 +169,7 @@ stateq(int id, struct grid *gp, molData *md, const int ispec, configInfo *par\
oopop[t]=opop[t];
opop[t]=gp[id].mol[ispec].pops[t];

#pragma omp critical
/* Removed pragma omp critical; no two threads have the same cell id. */
{
gp[id].mol[ispec].pops[t]=gsl_vector_get(newpop,t);
}
Expand All @@ -204,14 +182,8 @@ stateq(int id, struct grid *gp, molData *md, const int ispec, configInfo *par\
iter++;
}

for(ipart=0;ipart<md[ispec].npart;ipart++){
gsl_matrix_free(collPartMat[ipart].colli);
free(collPartMat[ipart].ctot);
}
free(collPartMat);

gsl_matrix_free(colli);
gsl_matrix_free(matrix);
gsl_matrix_free(reduc);
gsl_vector_free(rhVec);
gsl_vector_free(newpop);
gsl_permutation_free(p);
Expand Down

0 comments on commit 081d95e

Please sign in to comment.