diff options
-rw-r--r-- | chroma/cuda/daq.cu | 58 | ||||
-rw-r--r-- | chroma/cuda/photon.h | 45 | ||||
-rw-r--r-- | chroma/cuda/propagate.cu | 20 | ||||
-rw-r--r-- | chroma/event.py | 15 | ||||
-rw-r--r-- | chroma/gpu/daq.py | 4 | ||||
-rw-r--r-- | chroma/gpu/pdf.py | 3 | ||||
-rw-r--r-- | chroma/gpu/photon.py | 24 | ||||
-rw-r--r-- | chroma/sim.py | 14 |
8 files changed, 132 insertions, 51 deletions
diff --git a/chroma/cuda/daq.cu b/chroma/cuda/daq.cu index 3ed402d..719d928 100644 --- a/chroma/cuda/daq.cu +++ b/chroma/cuda/daq.cu @@ -36,6 +36,7 @@ __global__ void run_daq(curandState *s, unsigned int detection_state, int first_photon, int nphotons, float *photon_times, unsigned int *photon_histories, int *last_hit_triangles, + float *weights, int *solid_map, Detector *detector, unsigned int *earliest_time_int, @@ -55,22 +56,27 @@ run_daq(curandState *s, unsigned int detection_state, int channel_index = detector->solid_id_to_channel_index[solid_id]; if (channel_index >= 0 && (history & detection_state)) { - float time = photon_times[photon_id] + - sample_cdf(&rng, detector->time_cdf_len, - detector->time_cdf_x, detector->time_cdf_y); - unsigned int time_int = float_to_sortable_int(time); + + float weight = weights[photon_id]; + if (curand_uniform(&rng) < weight) { + float time = photon_times[photon_id] + + sample_cdf(&rng, detector->time_cdf_len, + detector->time_cdf_x, detector->time_cdf_y); + unsigned int time_int = float_to_sortable_int(time); - float charge = sample_cdf(&rng, detector->charge_cdf_len, + float charge = sample_cdf(&rng, detector->charge_cdf_len, detector->charge_cdf_x, detector->charge_cdf_y); - unsigned int charge_int = roundf(charge / detector->charge_unit); + unsigned int charge_int = roundf(charge / detector->charge_unit); - atomicMin(earliest_time_int + channel_index, time_int); - atomicAdd(channel_q_int + channel_index, charge_int); - atomicOr(channel_histories + channel_index, history); - } + atomicMin(earliest_time_int + channel_index, time_int); + atomicAdd(channel_q_int + channel_index, charge_int); + atomicOr(channel_histories + channel_index, history); + } // if weighted photon contributes - } + } // if photon detected by a channel + + } // if photon terminated on surface s[id] = rng; @@ -82,6 +88,7 @@ __global__ void run_daq_many(curandState *s, unsigned int detection_state, int first_photon, int nphotons, float *photon_times, unsigned int *photon_histories, int *last_hit_triangles, + float *weights, int *solid_map, Detector *detector, unsigned int *earliest_time_int, @@ -94,7 +101,7 @@ run_daq_many(curandState *s, unsigned int detection_state, __shared__ int channel_index; __shared__ unsigned int history; __shared__ float photon_time; - + __shared__ float weight; if (threadIdx.x == 0) { photon_id = first_photon + blockIdx.x; @@ -105,6 +112,7 @@ run_daq_many(curandState *s, unsigned int detection_state, history = photon_histories[photon_id]; channel_index = detector->solid_id_to_channel_index[solid_id]; photon_time = photon_times[photon_id]; + weight = weights[photon_id]; } } @@ -119,19 +127,21 @@ run_daq_many(curandState *s, unsigned int detection_state, for (int i = threadIdx.x; i < ndaq; i += blockDim.x) { int channel_offset = channel_index + i * channel_stride; - float time = photon_time + curand_normal(&rng) * 1.2f;// + - //sample_cdf(&rng, detector->time_cdf_len, - // detector->time_cdf_x, detector->time_cdf_y); - unsigned int time_int = float_to_sortable_int(time); - - float charge = 1.0f; //sample_cdf(&rng, detector->charge_cdf_len, - //detector->charge_cdf_x, - //detector->charge_cdf_y); - unsigned int charge_int = roundf(charge / detector->charge_unit); + if (curand_uniform(&rng) < weight) { + float time = photon_time + curand_normal(&rng) * 1.2f;// + + //sample_cdf(&rng, detector->time_cdf_len, + // detector->time_cdf_x, detector->time_cdf_y); + unsigned int time_int = float_to_sortable_int(time); - atomicMin(earliest_time_int + channel_offset, time_int); - atomicAdd(channel_q_int + channel_offset, charge_int); - atomicOr(channel_histories + channel_offset, history); + float charge = 1.0f; //sample_cdf(&rng, detector->charge_cdf_len, + //detector->charge_cdf_x, + //detector->charge_cdf_y); + unsigned int charge_int = roundf(charge / detector->charge_unit); + + atomicMin(earliest_time_int + channel_offset, time_int); + atomicAdd(channel_q_int + channel_offset, charge_int); + atomicOr(channel_histories + channel_offset, history); + } } s[id] = rng; diff --git a/chroma/cuda/photon.h b/chroma/cuda/photon.h index 8b7e588..79a87f0 100644 --- a/chroma/cuda/photon.h +++ b/chroma/cuda/photon.h @@ -9,6 +9,8 @@ #include "mesh.h" #include "geometry.h" +#define WEIGHT_LOWER_THRESHOLD 0.01f + struct Photon { float3 position; @@ -16,7 +18,9 @@ struct Photon float3 polarization; float wavelength; float time; - + + float weight; + unsigned int history; int last_hit_triangle; @@ -175,11 +179,17 @@ rayleigh_scatter(Photon &p, curandState &rng) p.polarization /= norm(p.polarization); } // scatter -__device__ int propagate_to_boundary(Photon &p, State &s, curandState &rng) +__device__ int propagate_to_boundary(Photon &p, State &s, curandState &rng, + bool use_weights=false) { float absorption_distance = -s.absorption_length*logf(curand_uniform(&rng)); float scattering_distance = -s.scattering_length*logf(curand_uniform(&rng)); + if (use_weights && p.weight > WEIGHT_LOWER_THRESHOLD) // Prevent absorption + absorption_distance = 1e30; + else + use_weights = false; + if (absorption_distance <= scattering_distance) { if (absorption_distance <= s.distance_to_boundary) { p.time += absorption_distance/(SPEED_OF_LIGHT/s.refractive_index1); @@ -193,6 +203,11 @@ __device__ int propagate_to_boundary(Photon &p, State &s, curandState &rng) } else { if (scattering_distance <= s.distance_to_boundary) { + + // Scale weight by absorption probability along this distance + if (use_weights) + p.weight *= expf(-scattering_distance/s.absorption_length); + p.time += scattering_distance/(SPEED_OF_LIGHT/s.refractive_index1); p.position += scattering_distance*p.direction; @@ -206,6 +221,10 @@ __device__ int propagate_to_boundary(Photon &p, State &s, curandState &rng) } // photon is scattered in material1 } // if scattering_distance < absorption_distance + // Scale weight by absorption probability along this distance + if (use_weights) + p.weight *= expf(-s.distance_to_boundary/s.absorption_length); + p.position += s.distance_to_boundary*p.direction; p.time += s.distance_to_boundary/(SPEED_OF_LIGHT/s.refractive_index1); @@ -269,7 +288,8 @@ propagate_at_boundary(Photon &p, State &s, curandState &rng) } // propagate_at_boundary __device__ int -propagate_at_surface(Photon &p, State &s, curandState &rng, Geometry *geometry) +propagate_at_surface(Photon &p, State &s, curandState &rng, Geometry *geometry, + bool use_weights=false) { Surface *surface = geometry->surfaces[s.surface_index]; @@ -283,6 +303,25 @@ propagate_at_surface(Photon &p, State &s, curandState &rng, Geometry *geometry) float uniform_sample = curand_uniform(&rng); + if (use_weights && p.weight > WEIGHT_LOWER_THRESHOLD + && absorb < (1.0f - WEIGHT_LOWER_THRESHOLD)) { + // Prevent absorption and reweight accordingly + float survive = 1.0f - absorb; + absorb = 0.0f; + p.weight *= survive; + + // Renormalize remaining probabilities + detect /= survive; + reflect_diffuse /= survive; + reflect_specular /= survive; + } + + if (use_weights && detect > 0.0f) { + p.history |= SURFACE_DETECT; + p.weight *= detect; + return BREAK; + } + if (uniform_sample < absorb) { p.history |= SURFACE_ABSORB; return BREAK; diff --git a/chroma/cuda/propagate.cu b/chroma/cuda/propagate.cu index 87ba9f5..29f8aea 100644 --- a/chroma/cuda/propagate.cu +++ b/chroma/cuda/propagate.cu @@ -14,7 +14,7 @@ photon_duplicate(int first_photon, int nthreads, float3 *positions, float3 *directions, float *wavelengths, float3 *polarizations, float *times, unsigned int *histories, - int *last_hit_triangles, + int *last_hit_triangles, float *weights, int copies, int stride) { int id = blockIdx.x*blockDim.x + threadIdx.x; @@ -32,6 +32,7 @@ photon_duplicate(int first_photon, int nthreads, p.time = times[photon_id]; p.last_hit_triangle = last_hit_triangles[photon_id]; p.history = histories[photon_id]; + p.weight = weights[photon_id]; for (int i=1; i <= copies; i++) { int target_photon_id = photon_id + stride * i; @@ -43,6 +44,7 @@ photon_duplicate(int first_photon, int nthreads, times[target_photon_id] = p.time; last_hit_triangles[target_photon_id] = p.last_hit_triangle; histories[target_photon_id] = p.history; + weights[target_photon_id] = p.weight; } } @@ -79,11 +81,11 @@ copy_photons(int first_photon, int nthreads, unsigned int target_flag, float3 *positions, float3 *directions, float *wavelengths, float3 *polarizations, float *times, unsigned int *histories, - int *last_hit_triangles, + int *last_hit_triangles, float *weights, float3 *new_positions, float3 *new_directions, float *new_wavelengths, float3 *new_polarizations, float *new_times, unsigned int *new_histories, - int *new_last_hit_triangles) + int *new_last_hit_triangles, float *new_weights) { int id = blockIdx.x*blockDim.x + threadIdx.x; @@ -102,6 +104,7 @@ copy_photons(int first_photon, int nthreads, unsigned int target_flag, new_times[offset] = times[photon_id]; new_histories[offset] = histories[photon_id]; new_last_hit_triangles[offset] = last_hit_triangles[photon_id]; + new_weights[offset] = weights[photon_id]; } } @@ -112,7 +115,8 @@ propagate(int first_photon, int nthreads, unsigned int *input_queue, float3 *positions, float3 *directions, float *wavelengths, float3 *polarizations, float *times, unsigned int *histories, - int *last_hit_triangles, int max_steps, + int *last_hit_triangles, float *weights, + int max_steps, int use_weights, Geometry *g) { __shared__ Geometry sg; @@ -143,6 +147,7 @@ propagate(int first_photon, int nthreads, unsigned int *input_queue, p.time = times[photon_id]; p.last_hit_triangle = last_hit_triangles[photon_id]; p.history = histories[photon_id]; + p.weight = weights[photon_id]; if (p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB)) return; @@ -166,7 +171,7 @@ propagate(int first_photon, int nthreads, unsigned int *input_queue, if (p.last_hit_triangle == -1) break; - command = propagate_to_boundary(p, s, rng); + command = propagate_to_boundary(p, s, rng, use_weights); if (command == BREAK) break; @@ -175,7 +180,7 @@ propagate(int first_photon, int nthreads, unsigned int *input_queue, continue; if (s.surface_index != -1) { - command = propagate_at_surface(p, s, rng, g); + command = propagate_at_surface(p, s, rng, g, use_weights); if (command == BREAK) break; @@ -196,7 +201,8 @@ propagate(int first_photon, int nthreads, unsigned int *input_queue, times[photon_id] = p.time; histories[photon_id] = p.history; last_hit_triangles[photon_id] = p.last_hit_triangle; - + weights[photon_id] = p.weight; + // Not done, put photon in output queue if ((p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB)) == 0) { int out_idx = atomicAdd(output_queue, 1); diff --git a/chroma/event.py b/chroma/event.py index 334df9a..e65c543 100644 --- a/chroma/event.py +++ b/chroma/event.py @@ -42,7 +42,7 @@ class Vertex(object): self.t0 = t0 class Photons(object): - def __init__(self, pos, dir, pol, wavelengths, t=None, last_hit_triangles=None, flags=None): + def __init__(self, pos, dir, pol, wavelengths, t=None, last_hit_triangles=None, flags=None, weights=None): '''Create a new list of n photons. pos: numpy.ndarray(dtype=numpy.float32, shape=(n,3)) @@ -67,6 +67,10 @@ class Photons(object): flags: numpy.ndarray(dtype=numpy.uint32, shape=n) Bit-field indicating the physics interaction history of the photon. See history bit constants in chroma.event for definition. + + weights: numpy.ndarray(dtype=numpy.float32, shape=n) + Survival probability for each photon. Used by + photon propagation code when computing likelihood functions. ''' self.pos = pos self.dir = dir @@ -89,6 +93,11 @@ class Photons(object): else: self.flags = flags + if weights is None: + self.weights = np.ones(len(pos), dtype=np.float32) + else: + self.weights = weights + def __add__(self, other): '''Concatenate two Photons objects into one list of photons. @@ -104,7 +113,9 @@ class Photons(object): t = np.concatenate((self.t, other.t)) last_hit_triangles = np.concatenate((self.last_hit_triangles, other.last_hit_triangles)) flags = np.concatenate((self.flags, other.flags)) - return Photons(pos, dir, pol, wavelengths, t, last_hit_triangles, flags) + weights = np.concatenate((self.weights, other.weights)) + return Photons(pos, dir, pol, wavelengths, t, last_hit_triangles, flags, + weights) def __len__(self): '''Returns the number of photons in self.''' diff --git a/chroma/gpu/daq.py b/chroma/gpu/daq.py index ec5add4..10b4b00 100644 --- a/chroma/gpu/daq.py +++ b/chroma/gpu/daq.py @@ -67,7 +67,7 @@ class GPUDaq(object): chunk_iterator(nphotons, nthreads_per_block, max_blocks): self.gpu_funcs.run_daq(rng_states, np.uint32(0x1 << 2), np.int32(start_photon+first_photon), np.int32(photons_this_round), gpuphotons.t, - gpuphotons.flags, gpuphotons.last_hit_triangles, + gpuphotons.flags, gpuphotons.last_hit_triangles, gpuphotons.weights, self.solid_id_map_gpu, self.detector_gpu, self.earliest_time_int_gpu, @@ -78,7 +78,7 @@ class GPUDaq(object): chunk_iterator(nphotons, 1, max_blocks): self.gpu_funcs.run_daq_many(rng_states, np.uint32(0x1 << 2), np.int32(start_photon), np.int32(photons_this_round), gpuphotons.t, - gpuphotons.flags, gpuphotons.last_hit_triangles, + gpuphotons.flags, gpuphotons.last_hit_triangles, gpuphotons.weights, self.solid_id_map_gpu, self.detector_gpu, self.earliest_time_int_gpu, diff --git a/chroma/gpu/pdf.py b/chroma/gpu/pdf.py index 5d27f63..fe718a7 100644 --- a/chroma/gpu/pdf.py +++ b/chroma/gpu/pdf.py @@ -313,6 +313,9 @@ class GPUPDF(object): self.work_queues, block=(nthreads_per_block,1,1), grid=(self.event_hit_gpu.size//nthreads_per_block+1,1)) + cuda.Context.get_current().synchronize() + return # FIXME + self.gpu_funcs.accumulate_nearest_neighbor_block(np.int32(self.event_nhit), np.int32(gpuchannels.ndaq), self.map_hit_offset_to_channel_id_gpu, diff --git a/chroma/gpu/photon.py b/chroma/gpu/photon.py index f55b8e8..80fe77c 100644 --- a/chroma/gpu/photon.py +++ b/chroma/gpu/photon.py @@ -34,6 +34,7 @@ class GPUPhotons(object): self.t = ga.empty(shape=nphotons*ncopies, dtype=np.float32) self.last_hit_triangles = ga.empty(shape=nphotons*ncopies, dtype=np.int32) self.flags = ga.empty(shape=nphotons*ncopies, dtype=np.uint32) + self.weights = ga.empty(shape=nphotons*ncopies, dtype=np.float32) # Assign the provided photons to the beginning (possibly # the entire array if ncopies is 1 @@ -44,6 +45,7 @@ class GPUPhotons(object): self.t[:nphotons].set(photons.t.astype(np.float32)) self.last_hit_triangles[:nphotons].set(photons.last_hit_triangles.astype(np.int32)) self.flags[:nphotons].set(photons.flags.astype(np.uint32)) + self.weights[:nphotons].set(photons.weights.astype(np.float32)) module = get_cu_module('propagate.cu', options=cuda_options) self.gpu_funcs = GPUFuncs(module) @@ -56,7 +58,7 @@ class GPUPhotons(object): chunk_iterator(nphotons, nthreads_per_block, max_blocks): self.gpu_funcs.photon_duplicate(np.int32(first_photon), np.int32(photons_this_round), self.pos, self.dir, self.wavelengths, self.pol, self.t, - self.flags, self.last_hit_triangles, + self.flags, self.last_hit_triangles, self.weights, np.int32(ncopies-1), np.int32(nphotons), block=(nthreads_per_block,1,1), grid=(blocks, 1)) @@ -74,7 +76,8 @@ class GPUPhotons(object): t = self.t.get() last_hit_triangles = self.last_hit_triangles.get() flags = self.flags.get() - return event.Photons(pos, dir, pol, wavelengths, t, last_hit_triangles, flags) + weights = self.weights.get() + return event.Photons(pos, dir, pol, wavelengths, t, last_hit_triangles, flags, weights) def iterate_copies(self): '''Returns an iterator that yields GPUPhotonsSlice objects @@ -87,11 +90,12 @@ class GPUPhotons(object): wavelengths=self.wavelengths[window], t=self.t[window], last_hit_triangles=self.last_hit_triangles[window], - flags=self.flags[window]) + flags=self.flags[window], + weights=self.weights[window]) @profile_if_possible def propagate(self, gpu_geometry, rng_states, nthreads_per_block=64, - max_blocks=1024, max_steps=10): + max_blocks=1024, max_steps=10, use_weights=False): """Propagate photons on GPU to termination or max_steps, whichever comes first. @@ -123,7 +127,7 @@ class GPUPhotons(object): for first_photon, photons_this_round, blocks in \ chunk_iterator(nphotons, nthreads_per_block, max_blocks): - self.gpu_funcs.propagate(np.int32(first_photon), np.int32(photons_this_round), input_queue_gpu[1:], output_queue_gpu, rng_states, self.pos, self.dir, self.wavelengths, self.pol, self.t, self.flags, self.last_hit_triangles, np.int32(nsteps), gpu_geometry.gpudata, block=(nthreads_per_block,1,1), grid=(blocks, 1)) + self.gpu_funcs.propagate(np.int32(first_photon), np.int32(photons_this_round), input_queue_gpu[1:], output_queue_gpu, rng_states, self.pos, self.dir, self.wavelengths, self.pol, self.t, self.flags, self.last_hit_triangles, self.weights, np.int32(nsteps), np.int32(use_weights), gpu_geometry.gpudata, block=(nthreads_per_block,1,1), grid=(blocks, 1)) step += nsteps @@ -171,6 +175,7 @@ class GPUPhotons(object): t = ga.empty(shape=reduced_nphotons, dtype=np.float32) last_hit_triangles = ga.empty(shape=reduced_nphotons, dtype=np.int32) flags = ga.empty(shape=reduced_nphotons, dtype=np.uint32) + weights = ga.empty(shape=reduced_nphotons, dtype=np.float32) # And finaly copy photons, if there are any if reduced_nphotons > 0: @@ -181,12 +186,12 @@ class GPUPhotons(object): np.int32(photons_this_round), np.uint32(target_flag), index_counter_gpu, - self.pos, self.dir, self.wavelengths, self.pol, self.t, self.flags, self.last_hit_triangles, - pos, dir, wavelengths, pol, t, flags, last_hit_triangles, + self.pos, self.dir, self.wavelengths, self.pol, self.t, self.flags, self.last_hit_triangles, self.weights, + pos, dir, wavelengths, pol, t, flags, last_hit_triangles, weights, block=(nthreads_per_block,1,1), grid=(blocks, 1)) assert index_counter_gpu.get()[0] == reduced_nphotons - return GPUPhotonsSlice(pos, dir, pol, wavelengths, t, last_hit_triangles, flags) + return GPUPhotonsSlice(pos, dir, pol, wavelengths, t, last_hit_triangles, flags, weights) def __del__(self): del self.pos @@ -210,7 +215,7 @@ class GPUPhotonsSlice(GPUPhotons): Returned by the GPUPhotons.iterate_copies() iterator.''' def __init__(self, pos, dir, pol, wavelengths, t, last_hit_triangles, - flags): + flags, weights): '''Create new object using slices of GPUArrays from an instance of GPUPhotons. NOTE THESE ARE NOT CPU ARRAYS!''' self.pos = pos @@ -220,6 +225,7 @@ class GPUPhotonsSlice(GPUPhotons): self.t = t self.last_hit_triangles = last_hit_triangles self.flags = flags + self.weights = weights module = get_cu_module('propagate.cu', options=cuda_options) self.gpu_funcs = GPUFuncs(module) diff --git a/chroma/sim.py b/chroma/sim.py index a82f1f8..47f1820 100644 --- a/chroma/sim.py +++ b/chroma/sim.py @@ -127,7 +127,8 @@ class Simulation(object): def eval_pdf(self, event_channels, iterable, min_twidth, trange, min_qwidth, qrange, min_bin_content=100, nreps=1, ndaq=1, time_only=True): """Returns tuple: 1D array of channel hit counts, 1D array of PDF probability densities.""" - gpu_daq = gpu.GPUDaq(self.gpu_geometry, ndaq=ndaq) + ndaq_reps = ndaq // 32 + gpu_daq = gpu.GPUDaq(self.gpu_geometry, ndaq=32) self.gpu_pdf.setup_pdf_eval(event_channels.hit, event_channels.t, @@ -148,7 +149,8 @@ class Simulation(object): gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps) gpu_photons.propagate(self.gpu_geometry, self.rng_states, nthreads_per_block=self.nthreads_per_block, - max_blocks=self.max_blocks) + max_blocks=self.max_blocks, + use_weights=True) nphotons = gpu_photons.true_nphotons for i in xrange(gpu_photons.ncopies): start_photon = i * nphotons @@ -157,8 +159,12 @@ class Simulation(object): nphotons=nphotons) if len(gpu_photon_slice) == 0: continue - gpu_channels = gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) - self.gpu_pdf.accumulate_pdf_eval(gpu_channels, 32) + + #weights = gpu_photon_slice.weights.get() + #print 'weights', weights.min(), weights.max() + for j in xrange(ndaq_reps): + gpu_channels = gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + self.gpu_pdf.accumulate_pdf_eval(gpu_channels, nthreads_per_block=32) return self.gpu_pdf.get_pdf_eval() |