summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--chroma/cuda/daq.cu10
-rw-r--r--chroma/gpu/daq.py12
-rw-r--r--chroma/sim.py62
3 files changed, 58 insertions, 26 deletions
diff --git a/chroma/cuda/daq.cu b/chroma/cuda/daq.cu
index 719d928..e1ab10d 100644
--- a/chroma/cuda/daq.cu
+++ b/chroma/cuda/daq.cu
@@ -40,7 +40,8 @@ run_daq(curandState *s, unsigned int detection_state,
int *solid_map,
Detector *detector,
unsigned int *earliest_time_int,
- unsigned int *channel_q_int, unsigned int *channel_histories)
+ unsigned int *channel_q_int, unsigned int *channel_histories,
+ float global_weight)
{
int id = threadIdx.x + blockDim.x * blockIdx.x;
@@ -57,7 +58,7 @@ run_daq(curandState *s, unsigned int detection_state,
if (channel_index >= 0 && (history & detection_state)) {
- float weight = weights[photon_id];
+ float weight = weights[photon_id] * global_weight;
if (curand_uniform(&rng) < weight) {
float time = photon_times[photon_id] +
sample_cdf(&rng, detector->time_cdf_len,
@@ -93,7 +94,8 @@ run_daq_many(curandState *s, unsigned int detection_state,
Detector *detector,
unsigned int *earliest_time_int,
unsigned int *channel_q_int, unsigned int *channel_histories,
- int ndaq, int channel_stride)
+ int ndaq, int channel_stride,
+ float global_weight)
{
__shared__ int photon_id;
__shared__ int triangle_id;
@@ -112,7 +114,7 @@ run_daq_many(curandState *s, unsigned int detection_state,
history = photon_histories[photon_id];
channel_index = detector->solid_id_to_channel_index[solid_id];
photon_time = photon_times[photon_id];
- weight = weights[photon_id];
+ weight = weights[photon_id] * global_weight;
}
}
diff --git a/chroma/gpu/daq.py b/chroma/gpu/daq.py
index 10b4b00..44fb330 100644
--- a/chroma/gpu/daq.py
+++ b/chroma/gpu/daq.py
@@ -51,12 +51,13 @@ class GPUDaq(object):
self.ndaq = ndaq
self.stride = gpu_detector.nchannels
- def acquire(self, gpuphotons, rng_states, nthreads_per_block=64, max_blocks=1024, start_photon=None, nphotons=None):
- self.gpu_funcs.reset_earliest_time_int(np.float32(1e9), np.int32(len(self.earliest_time_int_gpu)), self.earliest_time_int_gpu, block=(64,1,1), grid=(len(self.earliest_time_int_gpu)//nthreads_per_block+1,1))
+ def begin_acquire(self, nthreads_per_block=64):
+ self.gpu_funcs.reset_earliest_time_int(np.float32(1e9), np.int32(len(self.earliest_time_int_gpu)), self.earliest_time_int_gpu, block=(nthreads_per_block,1,1), grid=(len(self.earliest_time_int_gpu)//nthreads_per_block+1,1))
self.channel_q_int_gpu.fill(0)
self.channel_q_gpu.fill(0)
self.channel_history_gpu.fill(0)
+ def acquire(self, gpuphotons, rng_states, nthreads_per_block=64, max_blocks=1024, start_photon=None, nphotons=None, weight=1.0):
if start_photon is None:
start_photon = 0
if nphotons is None:
@@ -71,7 +72,8 @@ class GPUDaq(object):
self.solid_id_map_gpu,
self.detector_gpu,
self.earliest_time_int_gpu,
- self.channel_q_int_gpu, self.channel_history_gpu,
+ self.channel_q_int_gpu, self.channel_history_gpu,
+ np.float32(weight),
block=(nthreads_per_block,1,1), grid=(blocks,1))
else:
for first_photon, photons_this_round, blocks in \
@@ -84,9 +86,11 @@ class GPUDaq(object):
self.earliest_time_int_gpu,
self.channel_q_int_gpu, self.channel_history_gpu,
np.int32(self.ndaq), np.int32(self.stride),
+ np.float32(weight),
block=(nthreads_per_block,1,1), grid=(blocks,1))
cuda.Context.get_current().synchronize()
-
+
+ def end_acquire(self, nthreads_per_block=64):
self.gpu_funcs.convert_sortable_int_to_float(np.int32(len(self.earliest_time_int_gpu)), self.earliest_time_int_gpu, self.earliest_time_gpu, block=(nthreads_per_block,1,1), grid=(len(self.earliest_time_int_gpu)//nthreads_per_block+1,1))
self.gpu_funcs.convert_charge_int_to_float(self.detector_gpu, self.channel_q_int_gpu, self.channel_q_gpu, block=(nthreads_per_block,1,1), grid=(len(self.channel_q_int_gpu)//nthreads_per_block+1,1))
diff --git a/chroma/sim.py b/chroma/sim.py
index 4af877b..06771c7 100644
--- a/chroma/sim.py
+++ b/chroma/sim.py
@@ -89,7 +89,9 @@ class Simulation(object):
# Skip running DAQ if we don't have one
if hasattr(self, 'gpu_daq') and run_daq:
- gpu_channels = self.gpu_daq.acquire(gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks)
+ self.gpu_daq.begin_acquire()
+ self.gpu_daq.acquire(gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks)
+ gpu_channels = self.gpu_daq.end_acquire()
ev.channels = gpu_channels.get()
yield ev
@@ -118,18 +120,20 @@ class Simulation(object):
gpu_photons.propagate(self.gpu_geometry, self.rng_states,
nthreads_per_block=self.nthreads_per_block,
max_blocks=self.max_blocks)
- gpu_channels = self.gpu_daq.acquire(gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks)
+ self.gpu_daq.begin_acquire()
+ self.gpu_daq.acquire(gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks)
+ gpu_channels = self.gpu_daq.end_acquire()
self.gpu_pdf.add_hits_to_pdf(gpu_channels)
return self.gpu_pdf.get_pdfs()
@profile_if_possible
- def eval_pdf(self, event_channels, iterable, min_twidth, trange, min_qwidth, qrange, min_bin_content=100, nreps=1, ndaq=1, time_only=True):
+ def eval_pdf(self, event_channels, iterable, min_twidth, trange, min_qwidth, qrange, min_bin_content=100, nreps=1, ndaq=1, nscatter=4, time_only=True):
"""Returns tuple: 1D array of channel hit counts, 1D array of PDF
probability densities."""
ndaq_per_rep = 64
ndaq_reps = ndaq // ndaq_per_rep
- gpu_daq = gpu.GPUDaq(self.gpu_geometry, ndaq=32)
+ gpu_daq = gpu.GPUDaq(self.gpu_geometry, ndaq=ndaq_per_rep)
self.gpu_pdf.setup_pdf_eval(event_channels.hit,
event_channels.t,
@@ -147,24 +151,42 @@ class Simulation(object):
iterable = self.photon_generator.generate_events(iterable)
for ev in iterable:
- gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps)
- gpu_photons.propagate(self.gpu_geometry, self.rng_states,
- nthreads_per_block=self.nthreads_per_block,
- max_blocks=self.max_blocks,
- use_weights=True)
- nphotons = gpu_photons.true_nphotons
- for i in xrange(gpu_photons.ncopies):
+ gpu_photons_no_scatter = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps)
+ gpu_photons_scatter = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps*nscatter)
+ gpu_photons_no_scatter.propagate(self.gpu_geometry, self.rng_states,
+ nthreads_per_block=self.nthreads_per_block,
+ max_blocks=self.max_blocks,
+ use_weights=True,
+ scatter_first=-1,
+ max_steps=10)
+ gpu_photons_scatter.propagate(self.gpu_geometry, self.rng_states,
+ nthreads_per_block=self.nthreads_per_block,
+ max_blocks=self.max_blocks,
+ use_weights=True,
+ scatter_first=1,
+ max_steps=5)
+ nphotons = gpu_photons_no_scatter.true_nphotons # same for scatter
+ for i in xrange(gpu_photons_no_scatter.ncopies):
start_photon = i * nphotons
- gpu_photon_slice = gpu_photons.select(event.SURFACE_DETECT,
- start_photon=start_photon,
- nphotons=nphotons)
- if len(gpu_photon_slice) == 0:
+ gpu_photon_no_scatter_slice = gpu_photons_no_scatter.select(event.SURFACE_DETECT,
+ start_photon=start_photon,
+ nphotons=nphotons)
+ gpu_photon_scatter_slices = [gpu_photons_scatter.select(event.SURFACE_DETECT,
+ start_photon=(nscatter*i+j)*nphotons,
+ nphotons=nphotons)
+ for j in xrange(nscatter)]
+
+ if len(gpu_photon_no_scatter_slice) == 0:
continue
#weights = gpu_photon_slice.weights.get()
#print 'weights', weights.min(), weights.max()
for j in xrange(ndaq_reps):
- gpu_channels = gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks)
+ gpu_daq.begin_acquire()
+ gpu_daq.acquire(gpu_photon_no_scatter_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks)
+ for scatter_slice in gpu_photon_scatter_slices:
+ gpu_daq.acquire(scatter_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks, weight=1.0/nscatter)
+ gpu_channels = gpu_daq.end_acquire()
self.gpu_pdf.accumulate_pdf_eval(gpu_channels, nthreads_per_block=ndaq_per_rep)
return self.gpu_pdf.get_pdf_eval()
@@ -189,7 +211,9 @@ class Simulation(object):
max_blocks=self.max_blocks)
for gpu_photon_slice in gpu_photons.iterate_copies():
for idaq in xrange(ndaq):
- gpu_channels = self.gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks)
+ self.gpu_daq.begin_acquire()
+ self.gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks)
+ gpu_channels = self.gpu_daq.end_acquire()
self.gpu_pdf_kernel.accumulate_moments(gpu_channels)
self.gpu_pdf_kernel.compute_bandwidth(event_channels.hit,
@@ -220,7 +244,9 @@ class Simulation(object):
max_blocks=self.max_blocks)
for gpu_photon_slice in gpu_photons.iterate_copies():
for idaq in xrange(ndaq):
- gpu_channels = self.gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks)
+ self.gpu_daq.begin_acquire()
+ self.gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks)
+ gpu_channels = self.gpu_daq.end_acquire()
self.gpu_pdf_kernel.accumulate_kernel(gpu_channels)
return self.gpu_pdf_kernel.get_kernel_eval()