diff options
-rw-r--r-- | chroma/cuda/daq.cu | 10 | ||||
-rw-r--r-- | chroma/gpu/daq.py | 12 | ||||
-rw-r--r-- | chroma/sim.py | 62 |
3 files changed, 58 insertions, 26 deletions
diff --git a/chroma/cuda/daq.cu b/chroma/cuda/daq.cu index 719d928..e1ab10d 100644 --- a/chroma/cuda/daq.cu +++ b/chroma/cuda/daq.cu @@ -40,7 +40,8 @@ run_daq(curandState *s, unsigned int detection_state, int *solid_map, Detector *detector, unsigned int *earliest_time_int, - unsigned int *channel_q_int, unsigned int *channel_histories) + unsigned int *channel_q_int, unsigned int *channel_histories, + float global_weight) { int id = threadIdx.x + blockDim.x * blockIdx.x; @@ -57,7 +58,7 @@ run_daq(curandState *s, unsigned int detection_state, if (channel_index >= 0 && (history & detection_state)) { - float weight = weights[photon_id]; + float weight = weights[photon_id] * global_weight; if (curand_uniform(&rng) < weight) { float time = photon_times[photon_id] + sample_cdf(&rng, detector->time_cdf_len, @@ -93,7 +94,8 @@ run_daq_many(curandState *s, unsigned int detection_state, Detector *detector, unsigned int *earliest_time_int, unsigned int *channel_q_int, unsigned int *channel_histories, - int ndaq, int channel_stride) + int ndaq, int channel_stride, + float global_weight) { __shared__ int photon_id; __shared__ int triangle_id; @@ -112,7 +114,7 @@ run_daq_many(curandState *s, unsigned int detection_state, history = photon_histories[photon_id]; channel_index = detector->solid_id_to_channel_index[solid_id]; photon_time = photon_times[photon_id]; - weight = weights[photon_id]; + weight = weights[photon_id] * global_weight; } } diff --git a/chroma/gpu/daq.py b/chroma/gpu/daq.py index 10b4b00..44fb330 100644 --- a/chroma/gpu/daq.py +++ b/chroma/gpu/daq.py @@ -51,12 +51,13 @@ class GPUDaq(object): self.ndaq = ndaq self.stride = gpu_detector.nchannels - def acquire(self, gpuphotons, rng_states, nthreads_per_block=64, max_blocks=1024, start_photon=None, nphotons=None): - self.gpu_funcs.reset_earliest_time_int(np.float32(1e9), np.int32(len(self.earliest_time_int_gpu)), self.earliest_time_int_gpu, block=(64,1,1), grid=(len(self.earliest_time_int_gpu)//nthreads_per_block+1,1)) + def begin_acquire(self, nthreads_per_block=64): + self.gpu_funcs.reset_earliest_time_int(np.float32(1e9), np.int32(len(self.earliest_time_int_gpu)), self.earliest_time_int_gpu, block=(nthreads_per_block,1,1), grid=(len(self.earliest_time_int_gpu)//nthreads_per_block+1,1)) self.channel_q_int_gpu.fill(0) self.channel_q_gpu.fill(0) self.channel_history_gpu.fill(0) + def acquire(self, gpuphotons, rng_states, nthreads_per_block=64, max_blocks=1024, start_photon=None, nphotons=None, weight=1.0): if start_photon is None: start_photon = 0 if nphotons is None: @@ -71,7 +72,8 @@ class GPUDaq(object): self.solid_id_map_gpu, self.detector_gpu, self.earliest_time_int_gpu, - self.channel_q_int_gpu, self.channel_history_gpu, + self.channel_q_int_gpu, self.channel_history_gpu, + np.float32(weight), block=(nthreads_per_block,1,1), grid=(blocks,1)) else: for first_photon, photons_this_round, blocks in \ @@ -84,9 +86,11 @@ class GPUDaq(object): self.earliest_time_int_gpu, self.channel_q_int_gpu, self.channel_history_gpu, np.int32(self.ndaq), np.int32(self.stride), + np.float32(weight), block=(nthreads_per_block,1,1), grid=(blocks,1)) cuda.Context.get_current().synchronize() - + + def end_acquire(self, nthreads_per_block=64): self.gpu_funcs.convert_sortable_int_to_float(np.int32(len(self.earliest_time_int_gpu)), self.earliest_time_int_gpu, self.earliest_time_gpu, block=(nthreads_per_block,1,1), grid=(len(self.earliest_time_int_gpu)//nthreads_per_block+1,1)) self.gpu_funcs.convert_charge_int_to_float(self.detector_gpu, self.channel_q_int_gpu, self.channel_q_gpu, block=(nthreads_per_block,1,1), grid=(len(self.channel_q_int_gpu)//nthreads_per_block+1,1)) diff --git a/chroma/sim.py b/chroma/sim.py index 4af877b..06771c7 100644 --- a/chroma/sim.py +++ b/chroma/sim.py @@ -89,7 +89,9 @@ class Simulation(object): # Skip running DAQ if we don't have one if hasattr(self, 'gpu_daq') and run_daq: - gpu_channels = self.gpu_daq.acquire(gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + self.gpu_daq.begin_acquire() + self.gpu_daq.acquire(gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + gpu_channels = self.gpu_daq.end_acquire() ev.channels = gpu_channels.get() yield ev @@ -118,18 +120,20 @@ class Simulation(object): gpu_photons.propagate(self.gpu_geometry, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) - gpu_channels = self.gpu_daq.acquire(gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + self.gpu_daq.begin_acquire() + self.gpu_daq.acquire(gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + gpu_channels = self.gpu_daq.end_acquire() self.gpu_pdf.add_hits_to_pdf(gpu_channels) return self.gpu_pdf.get_pdfs() @profile_if_possible - def eval_pdf(self, event_channels, iterable, min_twidth, trange, min_qwidth, qrange, min_bin_content=100, nreps=1, ndaq=1, time_only=True): + def eval_pdf(self, event_channels, iterable, min_twidth, trange, min_qwidth, qrange, min_bin_content=100, nreps=1, ndaq=1, nscatter=4, time_only=True): """Returns tuple: 1D array of channel hit counts, 1D array of PDF probability densities.""" ndaq_per_rep = 64 ndaq_reps = ndaq // ndaq_per_rep - gpu_daq = gpu.GPUDaq(self.gpu_geometry, ndaq=32) + gpu_daq = gpu.GPUDaq(self.gpu_geometry, ndaq=ndaq_per_rep) self.gpu_pdf.setup_pdf_eval(event_channels.hit, event_channels.t, @@ -147,24 +151,42 @@ class Simulation(object): iterable = self.photon_generator.generate_events(iterable) for ev in iterable: - gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps) - gpu_photons.propagate(self.gpu_geometry, self.rng_states, - nthreads_per_block=self.nthreads_per_block, - max_blocks=self.max_blocks, - use_weights=True) - nphotons = gpu_photons.true_nphotons - for i in xrange(gpu_photons.ncopies): + gpu_photons_no_scatter = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps) + gpu_photons_scatter = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps*nscatter) + gpu_photons_no_scatter.propagate(self.gpu_geometry, self.rng_states, + nthreads_per_block=self.nthreads_per_block, + max_blocks=self.max_blocks, + use_weights=True, + scatter_first=-1, + max_steps=10) + gpu_photons_scatter.propagate(self.gpu_geometry, self.rng_states, + nthreads_per_block=self.nthreads_per_block, + max_blocks=self.max_blocks, + use_weights=True, + scatter_first=1, + max_steps=5) + nphotons = gpu_photons_no_scatter.true_nphotons # same for scatter + for i in xrange(gpu_photons_no_scatter.ncopies): start_photon = i * nphotons - gpu_photon_slice = gpu_photons.select(event.SURFACE_DETECT, - start_photon=start_photon, - nphotons=nphotons) - if len(gpu_photon_slice) == 0: + gpu_photon_no_scatter_slice = gpu_photons_no_scatter.select(event.SURFACE_DETECT, + start_photon=start_photon, + nphotons=nphotons) + gpu_photon_scatter_slices = [gpu_photons_scatter.select(event.SURFACE_DETECT, + start_photon=(nscatter*i+j)*nphotons, + nphotons=nphotons) + for j in xrange(nscatter)] + + if len(gpu_photon_no_scatter_slice) == 0: continue #weights = gpu_photon_slice.weights.get() #print 'weights', weights.min(), weights.max() for j in xrange(ndaq_reps): - gpu_channels = gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + gpu_daq.begin_acquire() + gpu_daq.acquire(gpu_photon_no_scatter_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + for scatter_slice in gpu_photon_scatter_slices: + gpu_daq.acquire(scatter_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks, weight=1.0/nscatter) + gpu_channels = gpu_daq.end_acquire() self.gpu_pdf.accumulate_pdf_eval(gpu_channels, nthreads_per_block=ndaq_per_rep) return self.gpu_pdf.get_pdf_eval() @@ -189,7 +211,9 @@ class Simulation(object): max_blocks=self.max_blocks) for gpu_photon_slice in gpu_photons.iterate_copies(): for idaq in xrange(ndaq): - gpu_channels = self.gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + self.gpu_daq.begin_acquire() + self.gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + gpu_channels = self.gpu_daq.end_acquire() self.gpu_pdf_kernel.accumulate_moments(gpu_channels) self.gpu_pdf_kernel.compute_bandwidth(event_channels.hit, @@ -220,7 +244,9 @@ class Simulation(object): max_blocks=self.max_blocks) for gpu_photon_slice in gpu_photons.iterate_copies(): for idaq in xrange(ndaq): - gpu_channels = self.gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + self.gpu_daq.begin_acquire() + self.gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + gpu_channels = self.gpu_daq.end_acquire() self.gpu_pdf_kernel.accumulate_kernel(gpu_channels) return self.gpu_pdf_kernel.get_kernel_eval() |