From 7b6e0e065ebda62952c1a979f97080d2ad7a72c6 Mon Sep 17 00:00:00 2001 From: Qianqian Fang Date: Sun, 10 Mar 2024 12:32:48 -0400 Subject: [PATCH] [optimize] cut hyperboloid gaussian register use from 15 to 3, #127,#214 --- src/mcx_core.cu | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/mcx_core.cu b/src/mcx_core.cu index 6d77e1c4..268b14a9 100644 --- a/src/mcx_core.cu +++ b/src/mcx_core.cu @@ -1396,44 +1396,43 @@ __device__ inline int launchnewphoton(MCXpos* p, MCXdir* v, Stokes* s, MCXtime* case (MCX_SRC_HYPERBOLOID_GAUSSIAN): { // hyperboloid gaussian beam, patch submitted by Gijs Buist (https://groups.google.com/g/mcx-users/c/wauKd1IbEJE/m/_7AQPgFYAAAJ) float sphi, cphi; - float phi = TWO_PI * rand_uniform01(t); - sincosf(phi, &sphi, &cphi); + float r = TWO_PI * rand_uniform01(t); + sincosf(r, &sphi, &cphi); - float r = sqrtf(0.5f * rand_next_scatlen(t)) * launchsrc->param1.x; + r = sqrtf(0.5f * rand_next_scatlen(t)) * launchsrc->param1.x; /** parameter to generate photon path from coordinates at focus (depends on focal distance and rayleigh range) */ - float tt = -launchsrc->param1.y / launchsrc->param1.z; - float l = rsqrtf(r * r + launchsrc->param1.z * launchsrc->param1.z); + rv->x = -launchsrc->param1.y / launchsrc->param1.z; + rv->y = rsqrtf(r * r + launchsrc->param1.z * launchsrc->param1.z); /** if beam direction is along +z or -z direction */ - float3 pd = float3(r * (cphi - tt * sphi), r * (sphi + tt * cphi), 0.f); // position displacement from srcpos - float3 v0 = float3(-r * sphi * l, r * cphi * l, launchsrc->param1.z * l); // photon dir. w.r.t the beam dir. v + *((float4*)p) = float4(r * (cphi - rv->x * sphi), r * (sphi + rv->x * cphi), 0.f, p->w); // position displacement from srcpos + *rv = float3(-r * sphi * rv->y, r * cphi * rv->y, launchsrc->param1.z * rv->y); // photon dir. w.r.t the beam dir. v /** if beam dir. is not +z or -z, compute photon position and direction after rotation */ if ( v->z > -1.f + EPS && v->z < 1.f - EPS ) { - float tmp0 = 1.f - v->z * v->z; - float tmp1 = rsqrtf(tmp0); - float ctheta = v->z; - float stheta = sqrtf(tmp0); - cphi = v->x * tmp1; - sphi = v->y * tmp1; + r = 1.f - v->z * v->z; + float stheta = sqrtf(r); + r = rsqrtf(r); + cphi = v->x * r; + sphi = v->y * r; /** photon position displacement after rotation */ - pd = float3(pd.x * cphi * ctheta - pd.y * sphi, pd.x * sphi * ctheta + pd.y * cphi, -pd.x * stheta); + *((float4*)p) = float4(p->x * cphi * v->z - p->y * sphi, p->x * sphi * v->z + p->y * cphi, -p->x * stheta, p->w); /** photon direction after rotation */ - *((float4*)v) = float4(v0.x * cphi * ctheta - v0.y * sphi + v0.z * cphi * stheta, - v0.x * sphi * ctheta + v0.y * cphi + v0.z * sphi * stheta, - -v0.x * stheta + v0.z * ctheta, + *((float4*)v) = float4(rv->x * cphi * v->z - rv->y * sphi + rv->z * cphi * stheta, + rv->x * sphi * v->z + rv->y * cphi + rv->z * sphi * stheta, + -rv->x * stheta + rv->z * v->z, v->nscat); GPUDEBUG(("new dir: %10.5e %10.5e %10.5e\n", v->x, v->y, v->z)); } else { - *((float4*)v) = float4(v0.x, v0.y, (v->z > 0.f) ? v0.z : -v0.z, v->nscat); + *((float4*)v) = float4(rv->x, rv->y, (v->z > 0.f) ? rv->z : -rv->z, v->nscat); GPUDEBUG(("new dir-z: %10.5e %10.5e %10.5e\n", v->x, v->y, v->z)); } /** compute final launch position and update medium label */ - *((float4*)p) = float4(p->x + pd.x, p->y + pd.y, p->z + pd.z, p->w); + *((float4*)p) = float4(p->x + launchsrc->pos.x, p->y + launchsrc->pos.y, p->z + launchsrc->pos.z, p->w); *idx1d = (int(floorf(p->z)) * gcfg->dimlen.y + int(floorf(p->y)) * gcfg->dimlen.x + int(floorf(p->x))); if (p->x < 0.f || p->y < 0.f || p->z < 0.f || p->x >= gcfg->maxidx.x || p->y >= gcfg->maxidx.y || p->z >= gcfg->maxidx.z) { @@ -1442,6 +1441,7 @@ __device__ inline int launchnewphoton(MCXpos* p, MCXdir* v, Stokes* s, MCXtime* *mediaid = media[*idx1d]; } + canfocus = 0; break; }