From 142b3c3caff164deb9bc7b2848e58e52387723ff Mon Sep 17 00:00:00 2001 From: Stan Seibert Date: Fri, 16 Sep 2011 15:02:02 -0400 Subject: Move CUDA source inside chroma package, rename tests directory to test --- chroma/cuda/__init__.py | 0 chroma/cuda/daq.cu | 198 ++++++++++++++++ chroma/cuda/geometry.h | 74 ++++++ chroma/cuda/hybrid_render.cu | 202 ++++++++++++++++ chroma/cuda/intersect.h | 141 ++++++++++++ chroma/cuda/linalg.h | 170 ++++++++++++++ chroma/cuda/matrix.h | 249 ++++++++++++++++++++ chroma/cuda/mesh.h | 175 ++++++++++++++ chroma/cuda/photon.h | 325 ++++++++++++++++++++++++++ chroma/cuda/physical_constants.h | 7 + chroma/cuda/propagate.cu | 147 ++++++++++++ chroma/cuda/random.h | 87 +++++++ chroma/cuda/render.cu | 171 ++++++++++++++ chroma/cuda/rotate.h | 27 +++ chroma/cuda/sorting.h | 107 +++++++++ chroma/cuda/tools.cu | 21 ++ chroma/cuda/transform.cu | 51 ++++ distribute_setup.py | 485 +++++++++++++++++++++++++++++++++++++++ src/__init__.py | 0 src/daq.cu | 198 ---------------- src/geometry.h | 74 ------ src/hybrid_render.cu | 202 ---------------- src/intersect.h | 141 ------------ src/linalg.h | 170 -------------- src/matrix.h | 249 -------------------- src/mesh.h | 175 -------------- src/photon.h | 325 -------------------------- src/physical_constants.h | 7 - src/propagate.cu | 147 ------------ src/random.h | 87 ------- src/render.cu | 171 -------------- src/rotate.h | 27 --- src/sorting.h | 107 --------- src/tools.cu | 21 -- src/transform.cu | 51 ---- test/__init__.py | 0 test/data/ray_intersection.npz | Bin 0 -> 1920080 bytes test/linalg_test.cu | 128 +++++++++++ test/linalg_test.py | 214 +++++++++++++++++ test/matrix_test.cu | 152 ++++++++++++ test/matrix_test.py | 327 ++++++++++++++++++++++++++ test/rotate_test.cu | 14 ++ test/rotate_test.py | 67 ++++++ test/test_fileio.py | 74 ++++++ test/test_generator_photon.py | 21 ++ test/test_generator_vertex.py | 23 ++ test/test_pdf.py | 67 ++++++ test/test_propagation.py | 58 +++++ test/test_ray_intersection.py | 27 +++ test/test_rayleigh.py | 57 +++++ test/test_sample_cdf.cu | 16 ++ test/test_sample_cdf.py | 64 ++++++ tests/__init__.py | 0 tests/data/ray_intersection.npz | Bin 1920080 -> 0 bytes tests/linalg_test.cu | 128 ----------- tests/linalg_test.py | 214 ----------------- tests/matrix_test.cu | 152 ------------ tests/matrix_test.py | 327 -------------------------- tests/rotate_test.cu | 14 -- tests/rotate_test.py | 67 ------ tests/test_fileio.py | 74 ------ tests/test_generator_photon.py | 21 -- tests/test_generator_vertex.py | 23 -- tests/test_pdf.py | 67 ------ tests/test_propagation.py | 58 ----- tests/test_ray_intersection.py | 27 --- tests/test_rayleigh.py | 57 ----- tests/test_sample_cdf.cu | 16 -- tests/test_sample_cdf.py | 64 ------ 69 files changed, 3946 insertions(+), 3461 deletions(-) create mode 100644 chroma/cuda/__init__.py create mode 100644 chroma/cuda/daq.cu create mode 100644 chroma/cuda/geometry.h create mode 100644 chroma/cuda/hybrid_render.cu create mode 100644 chroma/cuda/intersect.h create mode 100644 chroma/cuda/linalg.h create mode 100644 chroma/cuda/matrix.h create mode 100644 chroma/cuda/mesh.h create mode 100644 chroma/cuda/photon.h create mode 100644 chroma/cuda/physical_constants.h create mode 100644 chroma/cuda/propagate.cu create mode 100644 chroma/cuda/random.h create mode 100644 chroma/cuda/render.cu create mode 100644 chroma/cuda/rotate.h create mode 100644 chroma/cuda/sorting.h create mode 100644 chroma/cuda/tools.cu create mode 100644 chroma/cuda/transform.cu create mode 100644 distribute_setup.py delete mode 100644 src/__init__.py delete mode 100644 src/daq.cu delete mode 100644 src/geometry.h delete mode 100644 src/hybrid_render.cu delete mode 100644 src/intersect.h delete mode 100644 src/linalg.h delete mode 100644 src/matrix.h delete mode 100644 src/mesh.h delete mode 100644 src/photon.h delete mode 100644 src/physical_constants.h delete mode 100644 src/propagate.cu delete mode 100644 src/random.h delete mode 100644 src/render.cu delete mode 100644 src/rotate.h delete mode 100644 src/sorting.h delete mode 100644 src/tools.cu delete mode 100644 src/transform.cu create mode 100644 test/__init__.py create mode 100644 test/data/ray_intersection.npz create mode 100644 test/linalg_test.cu create mode 100644 test/linalg_test.py create mode 100644 test/matrix_test.cu create mode 100644 test/matrix_test.py create mode 100644 test/rotate_test.cu create mode 100644 test/rotate_test.py create mode 100644 test/test_fileio.py create mode 100644 test/test_generator_photon.py create mode 100644 test/test_generator_vertex.py create mode 100644 test/test_pdf.py create mode 100644 test/test_propagation.py create mode 100644 test/test_ray_intersection.py create mode 100644 test/test_rayleigh.py create mode 100644 test/test_sample_cdf.cu create mode 100644 test/test_sample_cdf.py delete mode 100644 tests/__init__.py delete mode 100644 tests/data/ray_intersection.npz delete mode 100644 tests/linalg_test.cu delete mode 100644 tests/linalg_test.py delete mode 100644 tests/matrix_test.cu delete mode 100644 tests/matrix_test.py delete mode 100644 tests/rotate_test.cu delete mode 100644 tests/rotate_test.py delete mode 100644 tests/test_fileio.py delete mode 100644 tests/test_generator_photon.py delete mode 100644 tests/test_generator_vertex.py delete mode 100644 tests/test_pdf.py delete mode 100644 tests/test_propagation.py delete mode 100644 tests/test_ray_intersection.py delete mode 100644 tests/test_rayleigh.py delete mode 100644 tests/test_sample_cdf.cu delete mode 100644 tests/test_sample_cdf.py diff --git a/chroma/cuda/__init__.py b/chroma/cuda/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/chroma/cuda/daq.cu b/chroma/cuda/daq.cu new file mode 100644 index 0000000..5a68846 --- /dev/null +++ b/chroma/cuda/daq.cu @@ -0,0 +1,198 @@ +// -*-c++-*- +#include + +__device__ unsigned int +float_to_sortable_int(float f) +{ + return __float_as_int(f); + //int i = __float_as_int(f); + //unsigned int mask = -(int)(i >> 31) | 0x80000000; + //return i ^ mask; +} + +__device__ float +sortable_int_to_float(unsigned int i) +{ + return __int_as_float(i); + //unsigned int mask = ((i >> 31) - 1) | 0x80000000; + //return __int_as_float(i ^ mask); +} + +extern "C" +{ + +__global__ void +reset_earliest_time_int(float maxtime, int ntime_ints, unsigned int *time_ints) +{ + int id = threadIdx.x + blockDim.x * blockIdx.x; + if (id < ntime_ints) { + unsigned int maxtime_int = float_to_sortable_int(maxtime); + time_ints[id] = maxtime_int; + } +} + +__global__ void +run_daq(curandState *s, unsigned int detection_state, float time_rms, + int first_photon, int nphotons, float *photon_times, + unsigned int *photon_histories, int *last_hit_triangles, + int *solid_map, int nsolids, unsigned int *earliest_time_int, + unsigned int *channel_q, unsigned int *channel_histories) +{ + + int id = threadIdx.x + blockDim.x * blockIdx.x; + + if (id < nphotons) { + curandState rng = s[id]; + int photon_id = id + first_photon; + int triangle_id = last_hit_triangles[photon_id]; + + if (triangle_id > -1) { + int solid_id = solid_map[triangle_id]; + unsigned int history = photon_histories[photon_id]; + + if (solid_id < nsolids && (history & detection_state)) { + float time = photon_times[photon_id] + curand_normal(&rng) * time_rms; + unsigned int time_int = float_to_sortable_int(time); + + atomicMin(earliest_time_int + solid_id, time_int); + atomicAdd(channel_q + solid_id, 1); + atomicOr(channel_histories + solid_id, history); + } + + } + + s[id] = rng; + + } + +} + +__global__ void +convert_sortable_int_to_float(int n, unsigned int *sortable_ints, + float *float_output) +{ + int id = threadIdx.x + blockDim.x * blockIdx.x; + + if (id < n) + float_output[id] = sortable_int_to_float(sortable_ints[id]); +} + +__global__ void +bin_hits(int nchannels, unsigned int *channel_q, float *channel_time, + unsigned int *hitcount, int tbins, float tmin, float tmax, int qbins, + float qmin, float qmax, unsigned int *pdf) +{ + int id = threadIdx.x + blockDim.x * blockIdx.x; + + if (id >= nchannels) + return; + + unsigned int q = channel_q[id]; + float t = channel_time[id]; + + if (t < 1e8 && t >= tmin && t < tmax && q >= qmin && q < qmax) { + hitcount[id] += 1; + + int tbin = (t - tmin) / (tmax - tmin) * tbins; + int qbin = (q - qmin) / (qmax - qmin) * qbins; + + // row major order (channel, t, q) + int bin = id * (tbins * qbins) + tbin * qbins + qbin; + pdf[bin] += 1; + } +} + +__global__ void +accumulate_pdf_eval(int time_only, int nchannels, unsigned int *event_hit, + float *event_time, float *event_charge, float *mc_time, + unsigned int *mc_charge, // quirk of DAQ! + unsigned int *hitcount, unsigned int *bincount, + float min_twidth, float tmin, float tmax, + float min_qwidth, float qmin, float qmax, + float *nearest_mc, int min_bin_content) +{ + int id = threadIdx.x + blockDim.x * blockIdx.x; + + if (id >= nchannels) + return; + + // Was this channel hit in the Monte Carlo? + float channel_mc_time = mc_time[id]; + if (channel_mc_time >= 1e8) + return; // Nothing else to do + + // Is this channel inside the range of the PDF? + float distance; + int channel_bincount = bincount[id]; + if (time_only) { + if (channel_mc_time < tmin || channel_mc_time > tmax) + return; // Nothing else to do + + // This MC information is contained in the PDF + hitcount[id] += 1; + + // Was this channel hit in the event-of-interest? + int channel_event_hit = event_hit[id]; + if (!channel_event_hit) + return; // No need to update PDF value for unhit channel + + // Are we inside the minimum size bin? + float channel_event_time = event_time[id]; + distance = fabsf(channel_mc_time - channel_event_time); + if (distance < min_twidth/2.0) + bincount[id] = channel_bincount + 1; + + } + else { // time and charge PDF + float channel_mc_charge = mc_charge[id]; // int->float conversion because DAQ just returns an integer + + if (channel_mc_time < tmin || channel_mc_time > tmax || + channel_mc_charge < qmin || channel_mc_charge > qmax) + return; // Nothing else to do + + // This MC information is contained in the PDF + hitcount[id] += 1; + + // Was this channel hit in the event-of-interest? + int channel_event_hit = event_hit[id]; + if (!channel_event_hit) + return; // No need to update PDF value for unhit channel + + // Are we inside the minimum size bin? + float channel_event_time = event_time[id]; + float channel_event_charge = event_charge[id]; + float normalized_time_distance = fabsf(channel_event_time - channel_mc_time)/min_twidth/2.0; + float normalized_charge_distance = fabsf(channel_event_charge - channel_mc_charge)/min_qwidth/2.0; + distance = sqrt(normalized_time_distance*normalized_time_distance + normalized_charge_distance*normalized_charge_distance); + + if (distance < 1.0f) + bincount[id] = channel_bincount + 1; + } + + // Do we need to keep updating the nearest_mc list? + if (channel_bincount + 1 >= min_bin_content) + return; // No need to perform insertion into nearest_mc because bincount is a better estimate of the PDF value + + // insertion sort the distance into the array of nearest MC points + int offset = min_bin_content * id; + int entry = min_bin_content - 1; + + // If last entry less than new entry, nothing to update + if (distance > nearest_mc[offset + entry]) + return; + + // Find where to insert the new entry while sliding the rest + // to the right + entry--; + while (entry >= 0) { + if (nearest_mc[offset+entry] >= distance) + nearest_mc[offset+entry+1] = nearest_mc[offset+entry]; + else + break; + entry--; + } + + nearest_mc[offset+entry+1] = distance; +} + +} // extern "C" diff --git a/chroma/cuda/geometry.h b/chroma/cuda/geometry.h new file mode 100644 index 0000000..2b5eacb --- /dev/null +++ b/chroma/cuda/geometry.h @@ -0,0 +1,74 @@ +#ifndef __GEOMETRY_H__ +#define __GEOMETRY_H__ + +struct Material +{ + float *refractive_index; + float *absorption_length; + float *scattering_length; + unsigned int n; + float step; + float wavelength0; +}; + +struct Surface +{ + float *detect; + float *absorb; + float *reflect_diffuse; + float *reflect_specular; + unsigned int n; + float step; + float wavelength0; +}; + +struct Triangle +{ + float3 v0, v1, v2; +}; + +struct Geometry +{ + float3 *vertices; + uint3 *triangles; + unsigned int *material_codes; + unsigned int *colors; + float3 *lower_bounds; + float3 *upper_bounds; + unsigned int *node_map; + unsigned int *node_map_end; + Material **materials; + Surface **surfaces; + unsigned int start_node; + unsigned int first_node; +}; + +__device__ Triangle +get_triangle(Geometry *geometry, const unsigned int &i) +{ + uint3 triangle_data = geometry->triangles[i]; + + Triangle triangle; + triangle.v0 = geometry->vertices[triangle_data.x]; + triangle.v1 = geometry->vertices[triangle_data.y]; + triangle.v2 = geometry->vertices[triangle_data.z]; + + return triangle; +} + +template +__device__ float +interp_property(T *m, const float &x, const float *fp) +{ + if (x < m->wavelength0) + return fp[0]; + + if (x > (m->wavelength0 + (m->n-1)*m->step)) + return fp[m->n-1]; + + int jl = (x-m->wavelength0)/m->step; + + return fp[jl] + (x-(m->wavelength0 + jl*m->step))*(fp[jl+1]-fp[jl])/m->step; +} + +#endif diff --git a/chroma/cuda/hybrid_render.cu b/chroma/cuda/hybrid_render.cu new file mode 100644 index 0000000..29edefa --- /dev/null +++ b/chroma/cuda/hybrid_render.cu @@ -0,0 +1,202 @@ +//-*-c-*- +#include +#include + +#include "linalg.h" +#include "matrix.h" +#include "rotate.h" +#include "mesh.h" +#include "geometry.h" +#include "photon.h" + +__device__ void +fAtomicAdd(float *addr, float data) +{ + while (data) + data = atomicExch(addr, data+atomicExch(addr, 0.0f)); +} + +__device__ void +to_diffuse(Photon &p, State &s, Geometry *g, curandState &rng, int max_steps) +{ + int steps = 0; + while (steps < max_steps) { + steps++; + + int command; + + fill_state(s, p, g); + + if (p.last_hit_triangle == -1) + break; + + command = propagate_to_boundary(p, s, rng); + + if (command == BREAK) + break; + + if (command == CONTINUE) + continue; + + if (s.surface_index != -1) { + command = propagate_at_surface(p, s, rng, g); + + if (p.history & REFLECT_DIFFUSE) + break; + + if (command == BREAK) + break; + + if (command == CONTINUE) + continue; + } + + propagate_at_boundary(p, s, rng); + + } // while (steps < max_steps) + +} // to_diffuse + +extern "C" +{ + +__global__ void +update_xyz_lookup(int nthreads, int total_threads, int offset, float3 position, + curandState *rng_states, float wavelength, float3 xyz, + float3 *xyz_lookup1, float3 *xyz_lookup2, int max_steps, + Geometry *g) +{ + int kernel_id = blockIdx.x*blockDim.x + threadIdx.x; + int id = kernel_id + offset; + + if (kernel_id >= nthreads || id >= total_threads) + return; + + curandState rng = rng_states[kernel_id]; + + Triangle t = get_triangle(g, id); + + float a = curand_uniform(&rng); + float b = uniform(&rng, 0.0f, (1.0f - a)); + float c = 1.0f - a - b; + + float3 direction = a*t.v0 + b*t.v1 + c*t.v2 - position; + direction /= norm(direction); + + float distance; + int triangle_index = intersect_mesh(position, direction, g, distance); + + if (triangle_index != id) { + rng_states[kernel_id] = rng; + return; + } + + float3 v01 = t.v1 - t.v0; + float3 v12 = t.v2 - t.v1; + + float3 surface_normal = normalize(cross(v01,v12)); + + float cos_theta = dot(surface_normal,-direction); + + if (cos_theta < 0.0f) + cos_theta = dot(-surface_normal,-direction); + + Photon p; + p.position = position; + p.direction = direction; + p.wavelength = wavelength; + p.polarization = uniform_sphere(&rng); + p.last_hit_triangle = -1; + p.time = 0; + p.history = 0; + + State s; + to_diffuse(p, s, g, rng, max_steps); + + if (p.history & REFLECT_DIFFUSE) { + if (s.inside_to_outside) { + fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].x, cos_theta*xyz.x); + fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].y, cos_theta*xyz.y); + fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].z, cos_theta*xyz.z); + } + else { + fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].x, cos_theta*xyz.x); + fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].y, cos_theta*xyz.y); + fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].z, cos_theta*xyz.z); + } + } + + rng_states[kernel_id] = rng; + +} // update_xyz_lookup + +__global__ void +update_xyz_image(int nthreads, curandState *rng_states, float3 *positions, + float3 *directions, float wavelength, float3 xyz, + float3 *xyz_lookup1, float3 *xyz_lookup2, float3 *image, + int nlookup_calls, int max_steps, Geometry *g) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + curandState rng = rng_states[id]; + + Photon p; + p.position = positions[id]; + p.direction = directions[id]; + p.direction /= norm(p.direction); + p.wavelength = wavelength; + p.polarization = uniform_sphere(&rng); + p.last_hit_triangle = -1; + p.time = 0; + p.history = 0; + + State s; + to_diffuse(p, s, g, rng, max_steps); + + if (p.history & REFLECT_DIFFUSE) { + if (s.inside_to_outside) + image[id] += xyz*xyz_lookup1[p.last_hit_triangle]/nlookup_calls; + else + image[id] += xyz*xyz_lookup2[p.last_hit_triangle]/nlookup_calls; + } + + rng_states[id] = rng; + +} // update_xyz_image + +__global__ void +process_image(int nthreads, float3 *image, unsigned int *pixels, int nimages) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + float3 rgb = image[id]/nimages; + + if (rgb.x < 0.0f) + rgb.x = 0.0f; + if (rgb.y < 0.0f) + rgb.y = 0.0f; + if (rgb.z < 0.0f) + rgb.z = 0.0f; + + if (rgb.x > 1.0f) + rgb.x = 1.0f; + if (rgb.y > 1.0f) + rgb.y = 1.0f; + if (rgb.z > 1.0f) + rgb.z = 1.0f; + + unsigned int r = floorf(rgb.x*255.0f); + unsigned int g = floorf(rgb.y*255.0f); + unsigned int b = floorf(rgb.z*255.0f); + + pixels[id] = 255 << 24 | r << 16 | g << 8 | b; + +} // process_image + +} // extern "c" diff --git a/chroma/cuda/intersect.h b/chroma/cuda/intersect.h new file mode 100644 index 0000000..978bde8 --- /dev/null +++ b/chroma/cuda/intersect.h @@ -0,0 +1,141 @@ +//-*-c-*- + +#ifndef __INTERSECT_H__ +#define __INTERSECT_H__ + +#include "linalg.h" +#include "matrix.h" +#include "geometry.h" + +#define EPSILON 0.0f + +/* Tests the intersection between a ray and a triangle. + If the ray intersects the triangle, set `distance` to the distance from + `origin` to the intersection and return true, else return false. + `direction` must be normalized to one. */ +__device__ bool +intersect_triangle(const float3 &origin, const float3 &direction, + const Triangle &triangle, float &distance) +{ + float3 m1 = triangle.v1-triangle.v0; + float3 m2 = triangle.v2-triangle.v0; + float3 m3 = -direction; + + Matrix m = make_matrix(m1, m2, m3); + + float determinant = det(m); + + if (determinant == 0.0f) + return false; + + float3 b = origin-triangle.v0; + + float u1 = ((m.a11*m.a22 - m.a12*m.a21)*b.x + + (m.a02*m.a21 - m.a01*m.a22)*b.y + + (m.a01*m.a12 - m.a02*m.a11)*b.z)/determinant; + + if (u1 < -EPSILON || u1 > 1.0f) + return false; + + float u2 = ((m.a12*m.a20 - m.a10*m.a22)*b.x + + (m.a00*m.a22 - m.a02*m.a20)*b.y + + (m.a02*m.a10 - m.a00*m.a12)*b.z)/determinant; + + if (u2 < -EPSILON || u2 > 1.0f) + return false; + + float u3 = ((m.a10*m.a21 - m.a11*m.a20)*b.x + + (m.a01*m.a20 - m.a00*m.a21)*b.y + + (m.a00*m.a11 - m.a01*m.a10)*b.z)/determinant; + + if (u3 <= 0.0f || (1.0f-u1-u2) < -EPSILON) + return false; + + distance = u3; + + return true; +} + +/* Tests the intersection between a ray and an axis-aligned box defined by + an upper and lower bound. If the ray intersects the box, set + `distance_to_box` to the distance from `origin` to the intersection and + return true, else return false. `direction` must be normalized to one. + + Source: "An Efficient and Robust Ray-Box Intersection Algorithm." + by Williams, et. al. */ +__device__ bool +intersect_box(const float3 &origin, const float3 &direction, + const float3 &lower_bound, const float3 &upper_bound, + float& distance_to_box) +{ + float kmin, kmax, kymin, kymax, kzmin, kzmax; + + float divx = 1.0f/direction.x; + if (divx >= 0.0f) + { + kmin = (lower_bound.x - origin.x)*divx; + kmax = (upper_bound.x - origin.x)*divx; + } + else + { + kmin = (upper_bound.x - origin.x)*divx; + kmax = (lower_bound.x - origin.x)*divx; + } + + if (kmax < 0.0f) + return false; + + float divy = 1.0f/direction.y; + if (divy >= 0.0f) + { + kymin = (lower_bound.y - origin.y)*divy; + kymax = (upper_bound.y - origin.y)*divy; + } + else + { + kymin = (upper_bound.y - origin.y)*divy; + kymax = (lower_bound.y - origin.y)*divy; + } + + if (kymax < 0.0f) + return false; + + if (kymin > kmin) + kmin = kymin; + + if (kymax < kmax) + kmax = kymax; + + if (kmin > kmax) + return false; + + float divz = 1.0f/direction.z; + if (divz >= 0.0f) + { + kzmin = (lower_bound.z - origin.z)*divz; + kzmax = (upper_bound.z - origin.z)*divz; + } + else + { + kzmin = (upper_bound.z - origin.z)*divz; + kzmax = (lower_bound.z - origin.z)*divz; + } + + if (kzmax < 0.0f) + return false; + + if (kzmin > kmin) + kmin = kzmin; + + if (kzmax < kmax) + kmax = kzmax; + + if (kmin > kmax) + return false; + + distance_to_box = kmin; + + return true; +} + +#endif diff --git a/chroma/cuda/linalg.h b/chroma/cuda/linalg.h new file mode 100644 index 0000000..35b2423 --- /dev/null +++ b/chroma/cuda/linalg.h @@ -0,0 +1,170 @@ +#ifndef __LINALG_H__ +#define __LINALG_H__ + +__device__ float3 +operator- (const float3 &a) +{ + return make_float3(-a.x, -a.y, -a.z); +} + +__device__ float3 +operator* (const float3 &a, const float3 &b) +{ + return make_float3(a.x*b.x, a.y*b.y, a.z*b.z); +} + +__device__ float3 +operator/ (const float3 &a, const float3 &b) +{ + return make_float3(a.x/b.x, a.y/b.y, a.z/b.z); +} + +__device__ void +operator*= (float3 &a, const float3 &b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; +} + +__device__ void +operator/= (float3 &a, const float3 &b) +{ + a.x /= b.x; + a.y /= b.y; + a.z /= b.z; +} + +__device__ float3 +operator+ (const float3 &a, const float3 &b) +{ + return make_float3(a.x+b.x, a.y+b.y, a.z+b.z); +} + +__device__ void +operator+= (float3 &a, const float3 &b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; +} + +__device__ float3 +operator- (const float3 &a, const float3 &b) +{ + return make_float3(a.x-b.x, a.y-b.y, a.z-b.z); +} + +__device__ void +operator-= (float3 &a, const float3 &b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; +} + +__device__ float3 +operator+ (const float3 &a, const float &c) +{ + return make_float3(a.x+c, a.y+c, a.z+c); +} + +__device__ void +operator+= (float3 &a, const float &c) +{ + a.x += c; + a.y += c; + a.z += c; +} + +__device__ float3 +operator+ (const float &c, const float3 &a) +{ + return make_float3(c+a.x, c+a.y, c+a.z); +} + +__device__ float3 +operator- (const float3 &a, const float &c) +{ + return make_float3(a.x-c, a.y-c, a.z-c); +} + +__device__ void +operator-= (float3 &a, const float &c) +{ + a.x -= c; + a.y -= c; + a.z -= c; +} + +__device__ float3 +operator- (const float &c, const float3& a) +{ + return make_float3(c-a.x, c-a.y, c-a.z); +} + +__device__ float3 +operator* (const float3 &a, const float &c) +{ + return make_float3(a.x*c, a.y*c, a.z*c); +} + +__device__ void +operator*= (float3 &a, const float &c) +{ + a.x *= c; + a.y *= c; + a.z *= c; +} + +__device__ float3 +operator* (const float &c, const float3& a) +{ + return make_float3(c*a.x, c*a.y, c*a.z); +} + +__device__ float3 +operator/ (const float3 &a, const float &c) +{ + return make_float3(a.x/c, a.y/c, a.z/c); +} + +__device__ void +operator/= (float3 &a, const float &c) +{ + a.x /= c; + a.y /= c; + a.z /= c; +} + +__device__ float3 +operator/ (const float &c, const float3 &a) +{ + return make_float3(c/a.x, c/a.y, c/a.z); +} + +__device__ float +dot(const float3 &a, const float3 &b) +{ + return a.x*b.x + a.y*b.y + a.z*b.z; +} + +__device__ float3 +cross(const float3 &a, const float3 &b) +{ + return make_float3(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); +} + +__device__ float +norm(const float3 &a) +{ + return sqrtf(dot(a,a)); +} + +__device__ float3 +normalize(const float3 &a) +{ + return a/norm(a); +} + +#endif diff --git a/chroma/cuda/matrix.h b/chroma/cuda/matrix.h new file mode 100644 index 0000000..0a66e58 --- /dev/null +++ b/chroma/cuda/matrix.h @@ -0,0 +1,249 @@ +#ifndef __MATRIX_H__ +#define __MATRIX_H__ + +struct Matrix +{ + float a00, a01, a02, a10, a11, a12, a20, a21, a22; +}; + +__device__ Matrix +make_matrix(float a00, float a01, float a02, + float a10, float a11, float a12, + float a20, float a21, float a22) +{ + Matrix m = {a00, a01, a02, a10, a11, a12, a20, a21, a22}; + return m; +} + +__device__ Matrix +make_matrix(const float3 &u1, const float3 &u2, const float3 &u3) +{ + Matrix m = {u1.x, u2.x, u3.x, u1.y, u2.y, u3.y, u1.z, u2.z, u3.z}; + return m; +} + +__device__ Matrix +operator- (const Matrix &m) +{ + return make_matrix(-m.a00, -m.a01, -m.a02, + -m.a10, -m.a11, -m.a12, + -m.a20, -m.a21, -m.a22); +} + +__device__ float3 +operator* (const Matrix &m, const float3 &a) +{ + return make_float3(m.a00*a.x + m.a01*a.y + m.a02*a.z, + m.a10*a.x + m.a11*a.y + m.a12*a.z, + m.a20*a.x + m.a21*a.y + m.a22*a.z); +} + +__device__ Matrix +operator+ (const Matrix &m, const Matrix &n) +{ + return make_matrix(m.a00+n.a00, m.a01+n.a01, m.a02+n.a02, + m.a10+n.a10, m.a11+n.a11, m.a12+n.a12, + m.a20+n.a20, m.a21+n.a21, m.a22+n.a22); +} + +__device__ void +operator+= (Matrix &m, const Matrix &n) +{ + m.a00 += n.a00; + m.a01 += n.a01; + m.a02 += n.a02; + m.a10 += n.a10; + m.a11 += n.a11; + m.a12 += n.a12; + m.a20 += n.a20; + m.a21 += n.a21; + m.a22 += n.a22; +} + +__device__ Matrix +operator- (const Matrix &m, const Matrix &n) +{ + return make_matrix(m.a00-n.a00, m.a01-n.a01, m.a02-n.a02, + m.a10-n.a10, m.a11-n.a11, m.a12-n.a12, + m.a20-n.a20, m.a21-n.a21, m.a22-n.a22); +} + +__device__ void +operator-= (Matrix &m, const Matrix& n) +{ + m.a00 -= n.a00; + m.a01 -= n.a01; + m.a02 -= n.a02; + m.a10 -= n.a10; + m.a11 -= n.a11; + m.a12 -= n.a12; + m.a20 -= n.a20; + m.a21 -= n.a21; + m.a22 -= n.a22; +} + +__device__ Matrix +operator* (const Matrix &m, const Matrix &n) +{ + return make_matrix(m.a00*n.a00 + m.a01*n.a10 + m.a02*n.a20, + m.a00*n.a01 + m.a01*n.a11 + m.a02*n.a21, + m.a00*n.a02 + m.a01*n.a12 + m.a02*n.a22, + m.a10*n.a00 + m.a11*n.a10 + m.a12*n.a20, + m.a10*n.a01 + m.a11*n.a11 + m.a12*n.a21, + m.a10*n.a02 + m.a11*n.a12 + m.a12*n.a22, + m.a20*n.a00 + m.a21*n.a10 + m.a22*n.a20, + m.a20*n.a01 + m.a21*n.a11 + m.a22*n.a21, + m.a20*n.a02 + m.a21*n.a12 + m.a22*n.a22); +} + +__device__ Matrix +operator+ (const Matrix &m, const float &c) +{ + return make_matrix(m.a00+c, m.a01+c, m.a02+c, + m.a10+c, m.a11+c, m.a12+c, + m.a20+c, m.a21+c, m.a22+c); +} + +__device__ void +operator+= (Matrix &m, const float &c) +{ + m.a00 += c; + m.a01 += c; + m.a02 += c; + m.a10 += c; + m.a11 += c; + m.a12 += c; + m.a20 += c; + m.a21 += c; + m.a22 += c; +} + +__device__ Matrix +operator+ (const float &c, const Matrix &m) +{ + return make_matrix(c+m.a00, c+m.a01, c+m.a02, + c+m.a10, c+m.a11, c+m.a12, + c+m.a20, c+m.a21, c+m.a22); +} + +__device__ Matrix +operator- (const Matrix &m, const float &c) +{ + return make_matrix(m.a00-c, m.a01-c, m.a02-c, + m.a10-c, m.a11-c, m.a12-c, + m.a20-c, m.a21-c, m.a22-c); +} + +__device__ void +operator-= (Matrix &m, const float &c) +{ + m.a00 -= c; + m.a01 -= c; + m.a02 -= c; + m.a10 -= c; + m.a11 -= c; + m.a12 -= c; + m.a20 -= c; + m.a21 -= c; + m.a22 -= c; +} + +__device__ Matrix +operator- (const float &c, const Matrix &m) +{ + return make_matrix(c-m.a00, c-m.a01, c-m.a02, + c-m.a10, c-m.a11, c-m.a12, + c-m.a20, c-m.a21, c-m.a22); +} + +__device__ Matrix +operator* (const Matrix &m, const float &c) +{ + return make_matrix(m.a00*c, m.a01*c, m.a02*c, + m.a10*c, m.a11*c, m.a12*c, + m.a20*c, m.a21*c, m.a22*c); +} + +__device__ void +operator*= (Matrix &m, const float &c) +{ + m.a00 *= c; + m.a01 *= c; + m.a02 *= c; + m.a10 *= c; + m.a11 *= c; + m.a12 *= c; + m.a20 *= c; + m.a21 *= c; + m.a22 *= c; +} + +__device__ Matrix +operator* (const float &c, const Matrix &m) +{ + return make_matrix(c*m.a00, c*m.a01, c*m.a02, + c*m.a10, c*m.a11, c*m.a12, + c*m.a20, c*m.a21, c*m.a22); +} + +__device__ Matrix +operator/ (const Matrix &m, const float &c) +{ + return make_matrix(m.a00/c, m.a01/c, m.a02/c, + m.a10/c, m.a11/c, m.a12/c, + m.a20/c, m.a21/c, m.a22/c); +} + +__device__ void +operator/= (Matrix &m, const float &c) +{ + m.a00 /= c; + m.a01 /= c; + m.a02 /= c; + m.a10 /= c; + m.a11 /= c; + m.a12 /= c; + m.a20 /= c; + m.a21 /= c; + m.a22 /= c; +} + +__device__ Matrix +operator/ (const float &c, const Matrix &m) +{ + return make_matrix(c/m.a00, c/m.a01, c/m.a02, + c/m.a10, c/m.a11, c/m.a12, + c/m.a20, c/m.a21, c/m.a22); +} + +__device__ float +det(const Matrix &m) +{ + return m.a00*(m.a11*m.a22 - m.a12*m.a21) - + m.a10*(m.a01*m.a22 - m.a02*m.a21) + + m.a20*(m.a01*m.a12 - m.a02*m.a11); +} + +__device__ Matrix +inv(const Matrix &m) +{ + return make_matrix(m.a11*m.a22 - m.a12*m.a21, + m.a02*m.a21 - m.a01*m.a22, + m.a01*m.a12 - m.a02*m.a11, + m.a12*m.a20 - m.a10*m.a22, + m.a00*m.a22 - m.a02*m.a20, + m.a02*m.a10 - m.a00*m.a12, + m.a10*m.a21 - m.a11*m.a20, + m.a01*m.a20 - m.a00*m.a21, + m.a00*m.a11 - m.a01*m.a10)/det(m); +} + +__device__ Matrix +outer(const float3 &a, const float3 &b) +{ + return make_matrix(a.x*b.x, a.x*b.y, a.x*b.z, + a.y*b.x, a.y*b.y, a.y*b.z, + a.z*b.x, a.z*b.y, a.z*b.z); +} + +#endif diff --git a/chroma/cuda/mesh.h b/chroma/cuda/mesh.h new file mode 100644 index 0000000..d60d801 --- /dev/null +++ b/chroma/cuda/mesh.h @@ -0,0 +1,175 @@ +#ifndef __MESH_H__ +#define __MESH_H__ + +#include "intersect.h" +#include "geometry.h" + +#include "stdio.h" + +#define STACK_SIZE 100 + +/* Tests the intersection between a ray and a node in the bounding volume + hierarchy. If the ray intersects the bounding volume and `min_distance` + is less than zero or the distance from `origin` to the intersection is + less than `min_distance`, return true, else return false. */ +__device__ bool +intersect_node(const float3 &origin, const float3 &direction, + Geometry *g, int i, float min_distance=-1.0f) +{ + /* assigning these to local variables is faster for some reason */ + float3 lower_bound = g->lower_bounds[i]; + float3 upper_bound = g->upper_bounds[i]; + + float distance_to_box; + + if (intersect_box(origin, direction, lower_bound, upper_bound, + distance_to_box)) { + if (min_distance < 0.0f) + return true; + + if (distance_to_box > min_distance) + return false; + + return true; + } + else { + return false; + } + +} + +/* Finds the intersection between a ray and `geometry`. If the ray does + intersect the mesh and the index of the intersected triangle is not equal + to `last_hit_triangle`, set `min_distance` to the distance from `origin` to + the intersection and return the index of the triangle which the ray + intersected, else return -1. */ +__device__ int +intersect_mesh(const float3 &origin, const float3& direction, Geometry *g, + float &min_distance, int last_hit_triangle = -1) +{ + int triangle_index = -1; + + float distance; + min_distance = -1.0f; + + if (!intersect_node(origin, direction, g, g->start_node, min_distance)) + return -1; + + unsigned int stack[STACK_SIZE]; + + unsigned int *head = &stack[0]; + unsigned int *node = &stack[1]; + unsigned int *tail = &stack[STACK_SIZE-1]; + *node = g->start_node; + + unsigned int i; + + do + { + unsigned int first_child = g->node_map[*node]; + unsigned int stop = g->node_map_end[*node]; + + while (*node >= g->first_node && stop == first_child+1) { + *node = first_child; + first_child = g->node_map[*node]; + stop = g->node_map_end[*node]; + } + + if (*node >= g->first_node) { + for (i=first_child; i < stop; i++) { + if (intersect_node(origin, direction, g, i, min_distance)) { + *node = i; + node++; + } + } + + node--; + } + else { + // node is a leaf + for (i=first_child; i < stop; i++) { + if (last_hit_triangle == i) + continue; + + Triangle t = get_triangle(g, i); + + if (intersect_triangle(origin, direction, t, distance)) { + if (triangle_index == -1) { + triangle_index = i; + min_distance = distance; + continue; + } + + if (distance < min_distance) { + triangle_index = i; + min_distance = distance; + } + } + } // triangle loop + + node--; + + } // node is a leaf + + if (node > tail) { + printf("warning: intersect_mesh() aborted; node > tail\n"); + break; + } + + } // while loop + while (node != head); + + return triangle_index; +} + +extern "C" +{ + +__global__ void +distance_to_mesh(int nthreads, float3 *_origin, float3 *_direction, + Geometry *g, float *_distance) +{ + __shared__ Geometry sg; + + if (threadIdx.x == 0) + sg = *g; + + __syncthreads(); + + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + g = &sg; + + float3 origin = _origin[id]; + float3 direction = _direction[id]; + direction /= norm(direction); + + float distance; + + int triangle_index = intersect_mesh(origin, direction, g, distance); + + if (triangle_index != -1) + _distance[id] = distance; +} + +__global__ void +color_solids(int first_triangle, int nthreads, int *solid_id_map, + bool *solid_hit, unsigned int *solid_colors, Geometry *g) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + int triangle_id = first_triangle + id; + int solid_id = solid_id_map[triangle_id]; + if (solid_hit[solid_id]) + g->colors[triangle_id] = solid_colors[solid_id]; +} + +} // extern "C" + +#endif diff --git a/chroma/cuda/photon.h b/chroma/cuda/photon.h new file mode 100644 index 0000000..8b7e588 --- /dev/null +++ b/chroma/cuda/photon.h @@ -0,0 +1,325 @@ +#ifndef __PHOTON_H__ +#define __PHOTON_H__ + +#include "stdio.h" +#include "linalg.h" +#include "rotate.h" +#include "random.h" +#include "physical_constants.h" +#include "mesh.h" +#include "geometry.h" + +struct Photon +{ + float3 position; + float3 direction; + float3 polarization; + float wavelength; + float time; + + unsigned int history; + + int last_hit_triangle; +}; + +struct State +{ + bool inside_to_outside; + + float3 surface_normal; + + float refractive_index1, refractive_index2; + float absorption_length; + float scattering_length; + + int surface_index; + + float distance_to_boundary; +}; + +enum +{ + NO_HIT = 0x1 << 0, + BULK_ABSORB = 0x1 << 1, + SURFACE_DETECT = 0x1 << 2, + SURFACE_ABSORB = 0x1 << 3, + RAYLEIGH_SCATTER = 0x1 << 4, + REFLECT_DIFFUSE = 0x1 << 5, + REFLECT_SPECULAR = 0x1 << 6, + NAN_ABORT = 0x1 << 31 +}; // processes + +enum {BREAK, CONTINUE, PASS}; // return value from propagate_to_boundary + +__device__ int +convert(int c) +{ + if (c & 0x80) + return (0xFFFFFF00 | c); + else + return c; +} + +__device__ float +get_theta(const float3 &a, const float3 &b) +{ + return acosf(fmaxf(-1.0f,fminf(1.0f,dot(a,b)))); +} + +__device__ void +fill_state(State &s, Photon &p, Geometry *g) +{ + p.last_hit_triangle = intersect_mesh(p.position, p.direction, g, + s.distance_to_boundary, + p.last_hit_triangle); + + if (p.last_hit_triangle == -1) { + p.history |= NO_HIT; + return; + } + + Triangle t = get_triangle(g, p.last_hit_triangle); + + unsigned int material_code = g->material_codes[p.last_hit_triangle]; + + int inner_material_index = convert(0xFF & (material_code >> 24)); + int outer_material_index = convert(0xFF & (material_code >> 16)); + s.surface_index = convert(0xFF & (material_code >> 8)); + + float3 v01 = t.v1 - t.v0; + float3 v12 = t.v2 - t.v1; + + s.surface_normal = normalize(cross(v01, v12)); + + Material *material1, *material2; + if (dot(s.surface_normal,-p.direction) > 0.0f) { + // outside to inside + material1 = g->materials[outer_material_index]; + material2 = g->materials[inner_material_index]; + + s.inside_to_outside = false; + } + else { + // inside to outside + material1 = g->materials[inner_material_index]; + material2 = g->materials[outer_material_index]; + s.surface_normal = -s.surface_normal; + + s.inside_to_outside = true; + } + + s.refractive_index1 = interp_property(material1, p.wavelength, + material1->refractive_index); + s.refractive_index2 = interp_property(material2, p.wavelength, + material2->refractive_index); + s.absorption_length = interp_property(material1, p.wavelength, + material1->absorption_length); + s.scattering_length = interp_property(material1, p.wavelength, + material1->scattering_length); + +} // fill_state + +__device__ float3 +pick_new_direction(float3 axis, float theta, float phi) +{ + // Taken from SNOMAN rayscatter.for + float cos_theta = cosf(theta); + float sin_theta = sinf(theta); + float cos_phi = cosf(phi); + float sin_phi = sinf(phi); + + float sin_axis_theta = sqrt(1.0f - axis.z*axis.z); + float cos_axis_phi, sin_axis_phi; + + if (isnan(sin_axis_theta) || sin_axis_theta < 0.00001f) { + cos_axis_phi = 1.0f; + sin_axis_phi = 0.0f; + } + else { + cos_axis_phi = axis.x / sin_axis_theta; + sin_axis_phi = axis.y / sin_axis_theta; + } + + float dirx = cos_theta*axis.x + + sin_theta*(axis.z*cos_phi*cos_axis_phi - sin_phi*sin_axis_phi); + float diry = cos_theta*axis.y + + sin_theta*(cos_phi*axis.z*sin_axis_phi - sin_phi*cos_axis_phi); + float dirz = cos_theta*axis.z - sin_theta*cos_phi*sin_axis_theta; + + return make_float3(dirx, diry, dirz); +} + +__device__ void +rayleigh_scatter(Photon &p, curandState &rng) +{ + float cos_theta = 2.0f*cosf((acosf(1.0f - 2.0f*curand_uniform(&rng))-2*PI)/3.0f); + if (cos_theta > 1.0f) + cos_theta = 1.0f; + else if (cos_theta < -1.0f) + cos_theta = -1.0f; + + float theta = acosf(cos_theta); + float phi = uniform(&rng, 0.0f, 2.0f * PI); + + p.direction = pick_new_direction(p.polarization, theta, phi); + + if (1.0f - fabsf(cos_theta) < 1e-6f) { + p.polarization = pick_new_direction(p.polarization, PI/2.0f, phi); + } + else { + // linear combination of old polarization and new direction + p.polarization = p.polarization - cos_theta * p.direction; + } + + p.direction /= norm(p.direction); + p.polarization /= norm(p.polarization); +} // scatter + +__device__ int propagate_to_boundary(Photon &p, State &s, curandState &rng) +{ + float absorption_distance = -s.absorption_length*logf(curand_uniform(&rng)); + float scattering_distance = -s.scattering_length*logf(curand_uniform(&rng)); + + if (absorption_distance <= scattering_distance) { + if (absorption_distance <= s.distance_to_boundary) { + p.time += absorption_distance/(SPEED_OF_LIGHT/s.refractive_index1); + p.position += absorption_distance*p.direction; + p.history |= BULK_ABSORB; + + p.last_hit_triangle = -1; + + return BREAK; + } // photon is absorbed in material1 + } + else { + if (scattering_distance <= s.distance_to_boundary) { + p.time += scattering_distance/(SPEED_OF_LIGHT/s.refractive_index1); + p.position += scattering_distance*p.direction; + + rayleigh_scatter(p, rng); + + p.history |= RAYLEIGH_SCATTER; + + p.last_hit_triangle = -1; + + return CONTINUE; + } // photon is scattered in material1 + } // if scattering_distance < absorption_distance + + p.position += s.distance_to_boundary*p.direction; + p.time += s.distance_to_boundary/(SPEED_OF_LIGHT/s.refractive_index1); + + return PASS; + +} // propagate_to_boundary + +__device__ void +propagate_at_boundary(Photon &p, State &s, curandState &rng) +{ + float incident_angle = get_theta(s.surface_normal,-p.direction); + float refracted_angle = asinf(sinf(incident_angle)*s.refractive_index1/s.refractive_index2); + + float3 incident_plane_normal = cross(p.direction, s.surface_normal); + float incident_plane_normal_length = norm(incident_plane_normal); + + // Photons at normal incidence do not have a unique plane of incidence, + // so we have to pick the plane normal to be the polarization vector + // to get the correct logic below + if (incident_plane_normal_length < 1e-6f) + incident_plane_normal = p.polarization; + else + incident_plane_normal /= incident_plane_normal_length; + + float normal_coefficient = dot(p.polarization, incident_plane_normal); + float normal_probability = normal_coefficient*normal_coefficient; + + float reflection_coefficient; + if (curand_uniform(&rng) < normal_probability) { + // photon polarization normal to plane of incidence + reflection_coefficient = -sinf(incident_angle-refracted_angle)/sinf(incident_angle+refracted_angle); + + if ((curand_uniform(&rng) < reflection_coefficient*reflection_coefficient) || isnan(refracted_angle)) { + p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal); + + p.history |= REFLECT_SPECULAR; + } + else { + p.direction = rotate(s.surface_normal, PI-refracted_angle, incident_plane_normal); + } + + p.polarization = incident_plane_normal; + } + else { + // photon polarization parallel to plane of incidence + reflection_coefficient = tanf(incident_angle-refracted_angle)/tanf(incident_angle+refracted_angle); + + if ((curand_uniform(&rng) < reflection_coefficient*reflection_coefficient) || isnan(refracted_angle)) { + p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal); + + p.history |= REFLECT_SPECULAR; + } + else { + p.direction = rotate(s.surface_normal, PI-refracted_angle, incident_plane_normal); + } + + p.polarization = cross(incident_plane_normal, p.direction); + p.polarization /= norm(p.polarization); + } + +} // propagate_at_boundary + +__device__ int +propagate_at_surface(Photon &p, State &s, curandState &rng, Geometry *geometry) +{ + Surface *surface = geometry->surfaces[s.surface_index]; + + /* since the surface properties are interpolated linearly, we are + guaranteed that they still sum to 1.0. */ + + float detect = interp_property(surface, p.wavelength, surface->detect); + float absorb = interp_property(surface, p.wavelength, surface->absorb); + float reflect_diffuse = interp_property(surface, p.wavelength, surface->reflect_diffuse); + float reflect_specular = interp_property(surface, p.wavelength, surface->reflect_specular); + + float uniform_sample = curand_uniform(&rng); + + if (uniform_sample < absorb) { + p.history |= SURFACE_ABSORB; + return BREAK; + } + else if (uniform_sample < absorb + detect) { + p.history |= SURFACE_DETECT; + return BREAK; + } + else if (uniform_sample < absorb + detect + reflect_diffuse) { + // diffusely reflect + p.direction = uniform_sphere(&rng); + + if (dot(p.direction, s.surface_normal) < 0.0f) + p.direction = -p.direction; + + // randomize polarization? + p.polarization = cross(uniform_sphere(&rng), p.direction); + p.polarization /= norm(p.polarization); + + p.history |= REFLECT_DIFFUSE; + + return CONTINUE; + } + else { + // specularly reflect + float incident_angle = get_theta(s.surface_normal,-p.direction); + float3 incident_plane_normal = cross(p.direction, s.surface_normal); + incident_plane_normal /= norm(incident_plane_normal); + + p.direction = rotate(s.surface_normal, incident_angle, + incident_plane_normal); + + p.history |= REFLECT_SPECULAR; + + return CONTINUE; + } + +} // propagate_at_surface + +#endif diff --git a/chroma/cuda/physical_constants.h b/chroma/cuda/physical_constants.h new file mode 100644 index 0000000..2ff87cd --- /dev/null +++ b/chroma/cuda/physical_constants.h @@ -0,0 +1,7 @@ +#ifndef __PHYSICAL_CONSTANTS_H__ +#define __PHYSICAL_CONSTANTS_H__ + +#define SPEED_OF_LIGHT 2.99792458e8 +#define PI 3.141592653589793 + +#endif diff --git a/chroma/cuda/propagate.cu b/chroma/cuda/propagate.cu new file mode 100644 index 0000000..fdcf532 --- /dev/null +++ b/chroma/cuda/propagate.cu @@ -0,0 +1,147 @@ +//-*-c-*- + +#include "linalg.h" +#include "geometry.h" +#include "photon.h" + +#include "stdio.h" + +extern "C" +{ + +__global__ void +photon_duplicate(int first_photon, int nthreads, + float3 *positions, float3 *directions, + float *wavelengths, float3 *polarizations, + float *times, unsigned int *histories, + int *last_hit_triangles, + int copies, int stride) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + int photon_id = first_photon + id; + + Photon p; + p.position = positions[photon_id]; + p.direction = directions[photon_id]; + p.polarization = polarizations[photon_id]; + p.wavelength = wavelengths[photon_id]; + p.time = times[photon_id]; + p.last_hit_triangle = last_hit_triangles[photon_id]; + p.history = histories[photon_id]; + + for (int i=1; i <= copies; i++) { + int target_photon_id = photon_id + stride * i; + + positions[target_photon_id] = p.position; + directions[target_photon_id] = p.direction; + polarizations[target_photon_id] = p.polarization; + wavelengths[target_photon_id] = p.wavelength; + times[target_photon_id] = p.time; + last_hit_triangles[target_photon_id] = p.last_hit_triangle; + histories[target_photon_id] = p.history; + } +} + +__global__ void +propagate(int first_photon, int nthreads, unsigned int *input_queue, + unsigned int *output_queue, curandState *rng_states, + float3 *positions, float3 *directions, + float *wavelengths, float3 *polarizations, + float *times, unsigned int *histories, + int *last_hit_triangles, int max_steps, + Geometry *g) +{ + __shared__ Geometry sg; + + if (threadIdx.x == 0) + sg = *g; + + __syncthreads(); + + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + g = &sg; + + curandState rng = rng_states[id]; + + int photon_id = input_queue[first_photon + id]; + + Photon p; + p.position = positions[photon_id]; + p.direction = directions[photon_id]; + p.direction /= norm(p.direction); + p.polarization = polarizations[photon_id]; + p.polarization /= norm(p.polarization); + p.wavelength = wavelengths[photon_id]; + p.time = times[photon_id]; + p.last_hit_triangle = last_hit_triangles[photon_id]; + p.history = histories[photon_id]; + + if (p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB)) + return; + + State s; + + int steps = 0; + while (steps < max_steps) { + steps++; + + int command; + + // check for NaN and fail + if (isnan(p.direction.x*p.direction.y*p.direction.z*p.position.x*p.position.y*p.position.z)) { + p.history |= NO_HIT | NAN_ABORT; + break; + } + + fill_state(s, p, g); + + if (p.last_hit_triangle == -1) + break; + + command = propagate_to_boundary(p, s, rng); + + if (command == BREAK) + break; + + if (command == CONTINUE) + continue; + + if (s.surface_index != -1) { + command = propagate_at_surface(p, s, rng, g); + + if (command == BREAK) + break; + + if (command == CONTINUE) + continue; + } + + propagate_at_boundary(p, s, rng); + + } // while (steps < max_steps) + + rng_states[id] = rng; + positions[photon_id] = p.position; + directions[photon_id] = p.direction; + polarizations[photon_id] = p.polarization; + wavelengths[photon_id] = p.wavelength; + times[photon_id] = p.time; + histories[photon_id] = p.history; + last_hit_triangles[photon_id] = p.last_hit_triangle; + + // Not done, put photon in output queue + if ((p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB)) == 0) { + int out_idx = atomicAdd(output_queue, 1); + output_queue[out_idx] = photon_id; + } +} // propagate + +} // extern "C" diff --git a/chroma/cuda/random.h b/chroma/cuda/random.h new file mode 100644 index 0000000..e9d2e75 --- /dev/null +++ b/chroma/cuda/random.h @@ -0,0 +1,87 @@ +#ifndef __RANDOM_H__ +#define __RANDOM_H__ + +#include + +#include "physical_constants.h" + +__device__ float +uniform(curandState *s, const float &low, const float &high) +{ + return low + curand_uniform(s)*(high-low); +} + +__device__ float3 +uniform_sphere(curandState *s) +{ + float theta = uniform(s, 0.0f, 2*PI); + float u = uniform(s, -1.0f, 1.0f); + float c = sqrtf(1.0f-u*u); + + return make_float3(c*cosf(theta), c*sinf(theta), u); +} + +// Draw a random sample given a cumulative distribution function +// Assumptions: ncdf >= 2, cdf_y[0] is 0.0, and cdf_y[ncdf-1] is 1.0 +__device__ float +sample_cdf(curandState *rng, int ncdf, float *cdf_x, float *cdf_y) +{ + float u = curand_uniform(rng); + + // Find u in cdf_y with binary search + // list must contain at least 2 elements: 0.0 and 1.0 + int lower=0; + int upper=ncdf-1; + while(lower < upper-1) { + int half = (lower+upper) / 2; + if (u < cdf_y[half]) + upper = half; + else + lower = half; + } + + float frac = (u - cdf_y[lower]) / (cdf_y[upper] - cdf_y[lower]); + return cdf_x[lower] * frac + cdf_x[upper] * (1.0f - frac); +} + +extern "C" +{ + +__global__ void +init_rng(int nthreads, curandState *s, unsigned long long seed, + unsigned long long offset) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + curand_init(seed, id, offset, &s[id]); +} + +__global__ void +fill_uniform(int nthreads, curandState *s, float *a, float low, float high) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] = uniform(&s[id], low, high); + +} + +__global__ void +fill_uniform_sphere(int nthreads, curandState *s, float3 *a) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] = uniform_sphere(&s[id]); +} + +} // extern "c" + +#endif diff --git a/chroma/cuda/render.cu b/chroma/cuda/render.cu new file mode 100644 index 0000000..d4ed443 --- /dev/null +++ b/chroma/cuda/render.cu @@ -0,0 +1,171 @@ +//-*-c-*- + +#include "linalg.h" +#include "intersect.h" +#include "mesh.h" +#include "sorting.h" +#include "geometry.h" + +#include "stdio.h" + +__device__ float4 +get_color(const float3 &direction, const Triangle &t, unsigned int rgba) +{ + float3 v01 = t.v1 - t.v0; + float3 v12 = t.v2 - t.v1; + + float3 surface_normal = normalize(cross(v01,v12)); + + float cos_theta = dot(surface_normal,-direction); + + if (cos_theta < 0.0f) + cos_theta = -cos_theta; + + unsigned int a0 = 0xff & (rgba >> 24); + unsigned int r0 = 0xff & (rgba >> 16); + unsigned int g0 = 0xff & (rgba >> 8); + unsigned int b0 = 0xff & rgba; + + float alpha = (255 - a0)/255.0f; + + return make_float4(r0*cos_theta, g0*cos_theta, b0*cos_theta, alpha); +} + +extern "C" +{ + +__global__ void +render(int nthreads, float3 *_origin, float3 *_direction, Geometry *g, + unsigned int alpha_depth, unsigned int *pixels, float *_dx, + unsigned int *dxlen, float4 *_color) +{ + __shared__ Geometry sg; + + if (threadIdx.x == 0) + sg = *g; + + __syncthreads(); + + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + g = &sg; + + float3 origin = _origin[id]; + float3 direction = _direction[id]; + unsigned int n = dxlen[id]; + + float distance; + + if (n < 1 && !intersect_node(origin, direction, g, g->start_node)) { + pixels[id] = 0; + return; + } + + unsigned int stack[STACK_SIZE]; + + unsigned int *head = &stack[0]; + unsigned int *node = &stack[1]; + unsigned int *tail = &stack[STACK_SIZE-1]; + *node = g->start_node; + + float *dx = _dx + id*alpha_depth; + float4 *color_a = _color + id*alpha_depth; + + unsigned int i; + + do { + unsigned int first_child = g->node_map[*node]; + unsigned int stop = g->node_map_end[*node]; + + while (*node >= g->first_node && stop == first_child+1) { + *node = first_child; + first_child = g->node_map[*node]; + stop = g->node_map_end[*node]; + } + + if (*node >= g->first_node) { + for (i=first_child; i < stop; i++) { + if (intersect_node(origin, direction, g, i)) { + *node = i; + node++; + } + } + + node--; + } + else { + // node is a leaf + for (i=first_child; i < stop; i++) { + Triangle t = get_triangle(g, i); + + if (intersect_triangle(origin, direction, t, distance)) { + if (n < 1) { + dx[0] = distance; + + unsigned int rgba = g->colors[i]; + float4 color = get_color(direction, t, rgba); + + color_a[0] = color; + } + else { + unsigned long j = searchsorted(n, dx, distance); + + if (j <= alpha_depth-1) { + insert(alpha_depth, dx, j, distance); + + unsigned int rgba = g->colors[i]; + float4 color = get_color(direction, t, rgba); + + insert(alpha_depth, color_a, j, color); + } + } + + if (n < alpha_depth) + n++; + } + + } // triangle loop + + node--; + + } // node is a leaf + + } // while loop + while (node != head); + + if (n < 1) { + pixels[id] = 0; + return; + } + + dxlen[id] = n; + + float scale = 1.0f; + float fr = 0.0f; + float fg = 0.0f; + float fb = 0.0f; + for (i=0; i < n; i++) { + float alpha = color_a[i].w; + + fr += scale*color_a[i].x*alpha; + fg += scale*color_a[i].y*alpha; + fb += scale*color_a[i].z*alpha; + + scale *= (1.0f-alpha); + } + unsigned int a; + if (n < alpha_depth) + a = floorf(255*(1.0f-scale)); + else + a = 255; + unsigned int red = floorf(fr/(1.0f-scale)); + unsigned int green = floorf(fg/(1.0f-scale)); + unsigned int blue = floorf(fb/(1.0f-scale)); + + pixels[id] = a << 24 | red << 16 | green << 8 | blue; +} + +} // extern "C" diff --git a/chroma/cuda/rotate.h b/chroma/cuda/rotate.h new file mode 100644 index 0000000..15f8037 --- /dev/null +++ b/chroma/cuda/rotate.h @@ -0,0 +1,27 @@ +#ifndef __ROTATE_H__ +#define __ROTATE_H__ + +#include "linalg.h" +#include "matrix.h" + +__device__ const Matrix IDENTITY_MATRIX = {1,0,0,0,1,0,0,0,1}; + +__device__ Matrix +make_rotation_matrix(float phi, const float3 &n) +{ + float cos_phi = cosf(phi); + float sin_phi = sinf(phi); + + return IDENTITY_MATRIX*cos_phi + (1-cos_phi)*outer(n,n) + + sin_phi*make_matrix(0,n.z,-n.y,-n.z,0,n.x,n.y,-n.x,0); +} + +/* rotate points counterclockwise, when looking towards +infinity, + through an angle `phi` about the axis `n`. */ +__device__ float3 +rotate(const float3 &a, float phi, const float3 &n) +{ + return make_rotation_matrix(phi,n)*a; +} + +#endif diff --git a/chroma/cuda/sorting.h b/chroma/cuda/sorting.h new file mode 100644 index 0000000..2bf1a5a --- /dev/null +++ b/chroma/cuda/sorting.h @@ -0,0 +1,107 @@ +template +__device__ void +swap(T &a, T &b) +{ + T tmp = a; + a = b; + b = tmp; +} + +template +__device__ void +reverse(int n, T *a) +{ + for (int i=0; i < n/2; i++) + swap(a[i],a[n-1-i]); +} + +template +__device__ void +piksrt(int n, T *arr) +{ + int i,j; + T a; + + for (j=1; j < n; j++) { + a = arr[j]; + i = j-1; + while (i >= 0 && arr[i] > a) { + arr[i+1] = arr[i]; + i--; + } + arr[i+1] = a; + } +} + +template +__device__ void +piksrt2(int n, T *arr, U *brr) +{ + int i,j; + T a; + U b; + + for (j=1; j < n; j++) { + a = arr[j]; + b = brr[j]; + i = j-1; + while (i >= 0 && arr[i] > a) { + arr[i+1] = arr[i]; + brr[i+1] = brr[i]; + i--; + } + arr[i+1] = a; + brr[i+1] = b; + } +} + +/* Returns the index in `arr` where `x` should be inserted in order to + maintain order. If `n` equals one, return the index such that, when + `x` is inserted, `arr` will be in ascending order. +*/ +template +__device__ unsigned long +searchsorted(unsigned long n, T *arr, const T &x) +{ + unsigned long ju,jm,jl; + int ascnd; + + jl = 0; + ju = n; + + ascnd = (arr[n-1] >= arr[0]); + + while (ju-jl > 1) { + jm = (ju+jl) >> 1; + + if ((x > arr[jm]) == ascnd) + jl = jm; + else + ju = jm; + } + + if ((x <= arr[0]) == ascnd) + return 0; + else + return ju; +} + +template +__device__ void +insert(unsigned long n, T *arr, unsigned long i, const T &x) +{ + unsigned long j; + for (j=n-1; j > i; j--) + arr[j] = arr[j-1]; + arr[i] = x; +} + +template +__device__ void +add_sorted(unsigned long n, T *arr, const T &x) +{ + unsigned long i = searchsorted(n, arr, x); + + if (i < n) + insert(n, arr, i, x); +} diff --git a/chroma/cuda/tools.cu b/chroma/cuda/tools.cu new file mode 100644 index 0000000..80d06ec --- /dev/null +++ b/chroma/cuda/tools.cu @@ -0,0 +1,21 @@ +//--*-c-*- + +extern "C" +{ + +__global__ void +interleave(int nthreads, unsigned long long *x, unsigned long long *y, + unsigned long long *z, int bits, unsigned long long *dest) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + for (int i=0; i < bits; i++) + dest[id] |= (x[id] & 1 << i) << (2*i) | + (y[id] & 1 << i) << (2*i+1) | + (z[id] & 1 << i) << (2*i+2); +} + +} // extern "C" diff --git a/chroma/cuda/transform.cu b/chroma/cuda/transform.cu new file mode 100644 index 0000000..1f4405e --- /dev/null +++ b/chroma/cuda/transform.cu @@ -0,0 +1,51 @@ +//-*-c-*- + +#include "linalg.h" +#include "rotate.h" + +extern "C" +{ + +/* Translate the points `a` by the vector `v` */ +__global__ void +translate(int nthreads, float3 *a, float3 v) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] += v; +} + +/* Rotate the points `a` through an angle `phi` counter-clockwise about the + axis `axis` (when looking towards +infinity). */ +__global__ void +rotate(int nthreads, float3 *a, float phi, float3 axis) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] = rotate(a[id], phi, axis); +} + +/* Rotate the points `a` through an angle `phi` counter-clockwise + (when looking towards +infinity along `axis`) about the axis defined + by the point `point` and the vector `axis` . */ +__global__ void +rotate_around_point(int nthreads, float3 *a, float phi, float3 axis, + float3 point) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] -= point; + a[id] = rotate(a[id], phi, axis); + a[id] += point; +} + +} // extern "c" diff --git a/distribute_setup.py b/distribute_setup.py new file mode 100644 index 0000000..5a95c92 --- /dev/null +++ b/distribute_setup.py @@ -0,0 +1,485 @@ +#!python +"""Bootstrap distribute installation + +If you want to use setuptools in your package's setup.py, just include this +file in the same directory with it, and add this to the top of your setup.py:: + + from distribute_setup import use_setuptools + use_setuptools() + +If you want to require a specific version of setuptools, set a download +mirror, or use an alternate download directory, you can do so by supplying +the appropriate options to ``use_setuptools()``. + +This file can also be run as a script to install or upgrade setuptools. +""" +import os +import sys +import time +import fnmatch +import tempfile +import tarfile +from distutils import log + +try: + from site import USER_SITE +except ImportError: + USER_SITE = None + +try: + import subprocess + + def _python_cmd(*args): + args = (sys.executable,) + args + return subprocess.call(args) == 0 + +except ImportError: + # will be used for python 2.3 + def _python_cmd(*args): + args = (sys.executable,) + args + # quoting arguments if windows + if sys.platform == 'win32': + def quote(arg): + if ' ' in arg: + return '"%s"' % arg + return arg + args = [quote(arg) for arg in args] + return os.spawnl(os.P_WAIT, sys.executable, *args) == 0 + +DEFAULT_VERSION = "0.6.21" +DEFAULT_URL = "http://pypi.python.org/packages/source/d/distribute/" +SETUPTOOLS_FAKED_VERSION = "0.6c11" + +SETUPTOOLS_PKG_INFO = """\ +Metadata-Version: 1.0 +Name: setuptools +Version: %s +Summary: xxxx +Home-page: xxx +Author: xxx +Author-email: xxx +License: xxx +Description: xxx +""" % SETUPTOOLS_FAKED_VERSION + + +def _install(tarball): + # extracting the tarball + tmpdir = tempfile.mkdtemp() + log.warn('Extracting in %s', tmpdir) + old_wd = os.getcwd() + try: + os.chdir(tmpdir) + tar = tarfile.open(tarball) + _extractall(tar) + tar.close() + + # going in the directory + subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) + os.chdir(subdir) + log.warn('Now working in %s', subdir) + + # installing + log.warn('Installing Distribute') + if not _python_cmd('setup.py', 'install'): + log.warn('Something went wrong during the installation.') + log.warn('See the error message above.') + finally: + os.chdir(old_wd) + + +def _build_egg(egg, tarball, to_dir): + # extracting the tarball + tmpdir = tempfile.mkdtemp() + log.warn('Extracting in %s', tmpdir) + old_wd = os.getcwd() + try: + os.chdir(tmpdir) + tar = tarfile.open(tarball) + _extractall(tar) + tar.close() + + # going in the directory + subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) + os.chdir(subdir) + log.warn('Now working in %s', subdir) + + # building an egg + log.warn('Building a Distribute egg in %s', to_dir) + _python_cmd('setup.py', '-q', 'bdist_egg', '--dist-dir', to_dir) + + finally: + os.chdir(old_wd) + # returning the result + log.warn(egg) + if not os.path.exists(egg): + raise IOError('Could not build the egg.') + + +def _do_download(version, download_base, to_dir, download_delay): + egg = os.path.join(to_dir, 'distribute-%s-py%d.%d.egg' + % (version, sys.version_info[0], sys.version_info[1])) + if not os.path.exists(egg): + tarball = download_setuptools(version, download_base, + to_dir, download_delay) + _build_egg(egg, tarball, to_dir) + sys.path.insert(0, egg) + import setuptools + setuptools.bootstrap_install_from = egg + + +def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, + to_dir=os.curdir, download_delay=15, no_fake=True): + # making sure we use the absolute path + to_dir = os.path.abspath(to_dir) + was_imported = 'pkg_resources' in sys.modules or \ + 'setuptools' in sys.modules + try: + try: + import pkg_resources + if not hasattr(pkg_resources, '_distribute'): + if not no_fake: + _fake_setuptools() + raise ImportError + except ImportError: + return _do_download(version, download_base, to_dir, download_delay) + try: + pkg_resources.require("distribute>="+version) + return + except pkg_resources.VersionConflict: + e = sys.exc_info()[1] + if was_imported: + sys.stderr.write( + "The required version of distribute (>=%s) is not available,\n" + "and can't be installed while this script is running. Please\n" + "install a more recent version first, using\n" + "'easy_install -U distribute'." + "\n\n(Currently using %r)\n" % (version, e.args[0])) + sys.exit(2) + else: + del pkg_resources, sys.modules['pkg_resources'] # reload ok + return _do_download(version, download_base, to_dir, + download_delay) + except pkg_resources.DistributionNotFound: + return _do_download(version, download_base, to_dir, + download_delay) + finally: + if not no_fake: + _create_fake_setuptools_pkg_info(to_dir) + +def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, + to_dir=os.curdir, delay=15): + """Download distribute from a specified location and return its filename + + `version` should be a valid distribute version number that is available + as an egg for download under the `download_base` URL (which should end + with a '/'). `to_dir` is the directory where the egg will be downloaded. + `delay` is the number of seconds to pause before an actual download + attempt. + """ + # making sure we use the absolute path + to_dir = os.path.abspath(to_dir) + try: + from urllib.request import urlopen + except ImportError: + from urllib2 import urlopen + tgz_name = "distribute-%s.tar.gz" % version + url = download_base + tgz_name + saveto = os.path.join(to_dir, tgz_name) + src = dst = None + if not os.path.exists(saveto): # Avoid repeated downloads + try: + log.warn("Downloading %s", url) + src = urlopen(url) + # Read/write all in one block, so we don't create a corrupt file + # if the download is interrupted. + data = src.read() + dst = open(saveto, "wb") + dst.write(data) + finally: + if src: + src.close() + if dst: + dst.close() + return os.path.realpath(saveto) + +def _no_sandbox(function): + def __no_sandbox(*args, **kw): + try: + from setuptools.sandbox import DirectorySandbox + if not hasattr(DirectorySandbox, '_old'): + def violation(*args): + pass + DirectorySandbox._old = DirectorySandbox._violation + DirectorySandbox._violation = violation + patched = True + else: + patched = False + except ImportError: + patched = False + + try: + return function(*args, **kw) + finally: + if patched: + DirectorySandbox._violation = DirectorySandbox._old + del DirectorySandbox._old + + return __no_sandbox + +def _patch_file(path, content): + """Will backup the file then patch it""" + existing_content = open(path).read() + if existing_content == content: + # already patched + log.warn('Already patched.') + return False + log.warn('Patching...') + _rename_path(path) + f = open(path, 'w') + try: + f.write(content) + finally: + f.close() + return True + +_patch_file = _no_sandbox(_patch_file) + +def _same_content(path, content): + return open(path).read() == content + +def _rename_path(path): + new_name = path + '.OLD.%s' % time.time() + log.warn('Renaming %s into %s', path, new_name) + os.rename(path, new_name) + return new_name + +def _remove_flat_installation(placeholder): + if not os.path.isdir(placeholder): + log.warn('Unkown installation at %s', placeholder) + return False + found = False + for file in os.listdir(placeholder): + if fnmatch.fnmatch(file, 'setuptools*.egg-info'): + found = True + break + if not found: + log.warn('Could not locate setuptools*.egg-info') + return + + log.warn('Removing elements out of the way...') + pkg_info = os.path.join(placeholder, file) + if os.path.isdir(pkg_info): + patched = _patch_egg_dir(pkg_info) + else: + patched = _patch_file(pkg_info, SETUPTOOLS_PKG_INFO) + + if not patched: + log.warn('%s already patched.', pkg_info) + return False + # now let's move the files out of the way + for element in ('setuptools', 'pkg_resources.py', 'site.py'): + element = os.path.join(placeholder, element) + if os.path.exists(element): + _rename_path(element) + else: + log.warn('Could not find the %s element of the ' + 'Setuptools distribution', element) + return True + +_remove_flat_installation = _no_sandbox(_remove_flat_installation) + +def _after_install(dist): + log.warn('After install bootstrap.') + placeholder = dist.get_command_obj('install').install_purelib + _create_fake_setuptools_pkg_info(placeholder) + +def _create_fake_setuptools_pkg_info(placeholder): + if not placeholder or not os.path.exists(placeholder): + log.warn('Could not find the install location') + return + pyver = '%s.%s' % (sys.version_info[0], sys.version_info[1]) + setuptools_file = 'setuptools-%s-py%s.egg-info' % \ + (SETUPTOOLS_FAKED_VERSION, pyver) + pkg_info = os.path.join(placeholder, setuptools_file) + if os.path.exists(pkg_info): + log.warn('%s already exists', pkg_info) + return + + log.warn('Creating %s', pkg_info) + f = open(pkg_info, 'w') + try: + f.write(SETUPTOOLS_PKG_INFO) + finally: + f.close() + + pth_file = os.path.join(placeholder, 'setuptools.pth') + log.warn('Creating %s', pth_file) + f = open(pth_file, 'w') + try: + f.write(os.path.join(os.curdir, setuptools_file)) + finally: + f.close() + +_create_fake_setuptools_pkg_info = _no_sandbox(_create_fake_setuptools_pkg_info) + +def _patch_egg_dir(path): + # let's check if it's already patched + pkg_info = os.path.join(path, 'EGG-INFO', 'PKG-INFO') + if os.path.exists(pkg_info): + if _same_content(pkg_info, SETUPTOOLS_PKG_INFO): + log.warn('%s already patched.', pkg_info) + return False + _rename_path(path) + os.mkdir(path) + os.mkdir(os.path.join(path, 'EGG-INFO')) + pkg_info = os.path.join(path, 'EGG-INFO', 'PKG-INFO') + f = open(pkg_info, 'w') + try: + f.write(SETUPTOOLS_PKG_INFO) + finally: + f.close() + return True + +_patch_egg_dir = _no_sandbox(_patch_egg_dir) + +def _before_install(): + log.warn('Before install bootstrap.') + _fake_setuptools() + + +def _under_prefix(location): + if 'install' not in sys.argv: + return True + args = sys.argv[sys.argv.index('install')+1:] + for index, arg in enumerate(args): + for option in ('--root', '--prefix'): + if arg.startswith('%s=' % option): + top_dir = arg.split('root=')[-1] + return location.startswith(top_dir) + elif arg == option: + if len(args) > index: + top_dir = args[index+1] + return location.startswith(top_dir) + if arg == '--user' and USER_SITE is not None: + return location.startswith(USER_SITE) + return True + + +def _fake_setuptools(): + log.warn('Scanning installed packages') + try: + import pkg_resources + except ImportError: + # we're cool + log.warn('Setuptools or Distribute does not seem to be installed.') + return + ws = pkg_resources.working_set + try: + setuptools_dist = ws.find(pkg_resources.Requirement.parse('setuptools', + replacement=False)) + except TypeError: + # old distribute API + setuptools_dist = ws.find(pkg_resources.Requirement.parse('setuptools')) + + if setuptools_dist is None: + log.warn('No setuptools distribution found') + return + # detecting if it was already faked + setuptools_location = setuptools_dist.location + log.warn('Setuptools installation detected at %s', setuptools_location) + + # if --root or --preix was provided, and if + # setuptools is not located in them, we don't patch it + if not _under_prefix(setuptools_location): + log.warn('Not patching, --root or --prefix is installing Distribute' + ' in another location') + return + + # let's see if its an egg + if not setuptools_location.endswith('.egg'): + log.warn('Non-egg installation') + res = _remove_flat_installation(setuptools_location) + if not res: + return + else: + log.warn('Egg installation') + pkg_info = os.path.join(setuptools_location, 'EGG-INFO', 'PKG-INFO') + if (os.path.exists(pkg_info) and + _same_content(pkg_info, SETUPTOOLS_PKG_INFO)): + log.warn('Already patched.') + return + log.warn('Patching...') + # let's create a fake egg replacing setuptools one + res = _patch_egg_dir(setuptools_location) + if not res: + return + log.warn('Patched done.') + _relaunch() + + +def _relaunch(): + log.warn('Relaunching...') + # we have to relaunch the process + # pip marker to avoid a relaunch bug + if sys.argv[:3] == ['-c', 'install', '--single-version-externally-managed']: + sys.argv[0] = 'setup.py' + args = [sys.executable] + sys.argv + sys.exit(subprocess.call(args)) + + +def _extractall(self, path=".", members=None): + """Extract all members from the archive to the current working + directory and set owner, modification time and permissions on + directories afterwards. `path' specifies a different directory + to extract to. `members' is optional and must be a subset of the + list returned by getmembers(). + """ + import copy + import operator + from tarfile import ExtractError + directories = [] + + if members is None: + members = self + + for tarinfo in members: + if tarinfo.isdir(): + # Extract directories with a safe mode. + directories.append(tarinfo) + tarinfo = copy.copy(tarinfo) + tarinfo.mode = 448 # decimal for oct 0700 + self.extract(tarinfo, path) + + # Reverse sort directories. + if sys.version_info < (2, 4): + def sorter(dir1, dir2): + return cmp(dir1.name, dir2.name) + directories.sort(sorter) + directories.reverse() + else: + directories.sort(key=operator.attrgetter('name'), reverse=True) + + # Set correct owner, mtime and filemode on directories. + for tarinfo in directories: + dirpath = os.path.join(path, tarinfo.name) + try: + self.chown(tarinfo, dirpath) + self.utime(tarinfo, dirpath) + self.chmod(tarinfo, dirpath) + except ExtractError: + e = sys.exc_info()[1] + if self.errorlevel > 1: + raise + else: + self._dbg(1, "tarfile: %s" % e) + + +def main(argv, version=DEFAULT_VERSION): + """Install or upgrade setuptools and EasyInstall""" + tarball = download_setuptools() + _install(tarball) + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/src/__init__.py b/src/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/daq.cu b/src/daq.cu deleted file mode 100644 index 5a68846..0000000 --- a/src/daq.cu +++ /dev/null @@ -1,198 +0,0 @@ -// -*-c++-*- -#include - -__device__ unsigned int -float_to_sortable_int(float f) -{ - return __float_as_int(f); - //int i = __float_as_int(f); - //unsigned int mask = -(int)(i >> 31) | 0x80000000; - //return i ^ mask; -} - -__device__ float -sortable_int_to_float(unsigned int i) -{ - return __int_as_float(i); - //unsigned int mask = ((i >> 31) - 1) | 0x80000000; - //return __int_as_float(i ^ mask); -} - -extern "C" -{ - -__global__ void -reset_earliest_time_int(float maxtime, int ntime_ints, unsigned int *time_ints) -{ - int id = threadIdx.x + blockDim.x * blockIdx.x; - if (id < ntime_ints) { - unsigned int maxtime_int = float_to_sortable_int(maxtime); - time_ints[id] = maxtime_int; - } -} - -__global__ void -run_daq(curandState *s, unsigned int detection_state, float time_rms, - int first_photon, int nphotons, float *photon_times, - unsigned int *photon_histories, int *last_hit_triangles, - int *solid_map, int nsolids, unsigned int *earliest_time_int, - unsigned int *channel_q, unsigned int *channel_histories) -{ - - int id = threadIdx.x + blockDim.x * blockIdx.x; - - if (id < nphotons) { - curandState rng = s[id]; - int photon_id = id + first_photon; - int triangle_id = last_hit_triangles[photon_id]; - - if (triangle_id > -1) { - int solid_id = solid_map[triangle_id]; - unsigned int history = photon_histories[photon_id]; - - if (solid_id < nsolids && (history & detection_state)) { - float time = photon_times[photon_id] + curand_normal(&rng) * time_rms; - unsigned int time_int = float_to_sortable_int(time); - - atomicMin(earliest_time_int + solid_id, time_int); - atomicAdd(channel_q + solid_id, 1); - atomicOr(channel_histories + solid_id, history); - } - - } - - s[id] = rng; - - } - -} - -__global__ void -convert_sortable_int_to_float(int n, unsigned int *sortable_ints, - float *float_output) -{ - int id = threadIdx.x + blockDim.x * blockIdx.x; - - if (id < n) - float_output[id] = sortable_int_to_float(sortable_ints[id]); -} - -__global__ void -bin_hits(int nchannels, unsigned int *channel_q, float *channel_time, - unsigned int *hitcount, int tbins, float tmin, float tmax, int qbins, - float qmin, float qmax, unsigned int *pdf) -{ - int id = threadIdx.x + blockDim.x * blockIdx.x; - - if (id >= nchannels) - return; - - unsigned int q = channel_q[id]; - float t = channel_time[id]; - - if (t < 1e8 && t >= tmin && t < tmax && q >= qmin && q < qmax) { - hitcount[id] += 1; - - int tbin = (t - tmin) / (tmax - tmin) * tbins; - int qbin = (q - qmin) / (qmax - qmin) * qbins; - - // row major order (channel, t, q) - int bin = id * (tbins * qbins) + tbin * qbins + qbin; - pdf[bin] += 1; - } -} - -__global__ void -accumulate_pdf_eval(int time_only, int nchannels, unsigned int *event_hit, - float *event_time, float *event_charge, float *mc_time, - unsigned int *mc_charge, // quirk of DAQ! - unsigned int *hitcount, unsigned int *bincount, - float min_twidth, float tmin, float tmax, - float min_qwidth, float qmin, float qmax, - float *nearest_mc, int min_bin_content) -{ - int id = threadIdx.x + blockDim.x * blockIdx.x; - - if (id >= nchannels) - return; - - // Was this channel hit in the Monte Carlo? - float channel_mc_time = mc_time[id]; - if (channel_mc_time >= 1e8) - return; // Nothing else to do - - // Is this channel inside the range of the PDF? - float distance; - int channel_bincount = bincount[id]; - if (time_only) { - if (channel_mc_time < tmin || channel_mc_time > tmax) - return; // Nothing else to do - - // This MC information is contained in the PDF - hitcount[id] += 1; - - // Was this channel hit in the event-of-interest? - int channel_event_hit = event_hit[id]; - if (!channel_event_hit) - return; // No need to update PDF value for unhit channel - - // Are we inside the minimum size bin? - float channel_event_time = event_time[id]; - distance = fabsf(channel_mc_time - channel_event_time); - if (distance < min_twidth/2.0) - bincount[id] = channel_bincount + 1; - - } - else { // time and charge PDF - float channel_mc_charge = mc_charge[id]; // int->float conversion because DAQ just returns an integer - - if (channel_mc_time < tmin || channel_mc_time > tmax || - channel_mc_charge < qmin || channel_mc_charge > qmax) - return; // Nothing else to do - - // This MC information is contained in the PDF - hitcount[id] += 1; - - // Was this channel hit in the event-of-interest? - int channel_event_hit = event_hit[id]; - if (!channel_event_hit) - return; // No need to update PDF value for unhit channel - - // Are we inside the minimum size bin? - float channel_event_time = event_time[id]; - float channel_event_charge = event_charge[id]; - float normalized_time_distance = fabsf(channel_event_time - channel_mc_time)/min_twidth/2.0; - float normalized_charge_distance = fabsf(channel_event_charge - channel_mc_charge)/min_qwidth/2.0; - distance = sqrt(normalized_time_distance*normalized_time_distance + normalized_charge_distance*normalized_charge_distance); - - if (distance < 1.0f) - bincount[id] = channel_bincount + 1; - } - - // Do we need to keep updating the nearest_mc list? - if (channel_bincount + 1 >= min_bin_content) - return; // No need to perform insertion into nearest_mc because bincount is a better estimate of the PDF value - - // insertion sort the distance into the array of nearest MC points - int offset = min_bin_content * id; - int entry = min_bin_content - 1; - - // If last entry less than new entry, nothing to update - if (distance > nearest_mc[offset + entry]) - return; - - // Find where to insert the new entry while sliding the rest - // to the right - entry--; - while (entry >= 0) { - if (nearest_mc[offset+entry] >= distance) - nearest_mc[offset+entry+1] = nearest_mc[offset+entry]; - else - break; - entry--; - } - - nearest_mc[offset+entry+1] = distance; -} - -} // extern "C" diff --git a/src/geometry.h b/src/geometry.h deleted file mode 100644 index 2b5eacb..0000000 --- a/src/geometry.h +++ /dev/null @@ -1,74 +0,0 @@ -#ifndef __GEOMETRY_H__ -#define __GEOMETRY_H__ - -struct Material -{ - float *refractive_index; - float *absorption_length; - float *scattering_length; - unsigned int n; - float step; - float wavelength0; -}; - -struct Surface -{ - float *detect; - float *absorb; - float *reflect_diffuse; - float *reflect_specular; - unsigned int n; - float step; - float wavelength0; -}; - -struct Triangle -{ - float3 v0, v1, v2; -}; - -struct Geometry -{ - float3 *vertices; - uint3 *triangles; - unsigned int *material_codes; - unsigned int *colors; - float3 *lower_bounds; - float3 *upper_bounds; - unsigned int *node_map; - unsigned int *node_map_end; - Material **materials; - Surface **surfaces; - unsigned int start_node; - unsigned int first_node; -}; - -__device__ Triangle -get_triangle(Geometry *geometry, const unsigned int &i) -{ - uint3 triangle_data = geometry->triangles[i]; - - Triangle triangle; - triangle.v0 = geometry->vertices[triangle_data.x]; - triangle.v1 = geometry->vertices[triangle_data.y]; - triangle.v2 = geometry->vertices[triangle_data.z]; - - return triangle; -} - -template -__device__ float -interp_property(T *m, const float &x, const float *fp) -{ - if (x < m->wavelength0) - return fp[0]; - - if (x > (m->wavelength0 + (m->n-1)*m->step)) - return fp[m->n-1]; - - int jl = (x-m->wavelength0)/m->step; - - return fp[jl] + (x-(m->wavelength0 + jl*m->step))*(fp[jl+1]-fp[jl])/m->step; -} - -#endif diff --git a/src/hybrid_render.cu b/src/hybrid_render.cu deleted file mode 100644 index 29edefa..0000000 --- a/src/hybrid_render.cu +++ /dev/null @@ -1,202 +0,0 @@ -//-*-c-*- -#include -#include - -#include "linalg.h" -#include "matrix.h" -#include "rotate.h" -#include "mesh.h" -#include "geometry.h" -#include "photon.h" - -__device__ void -fAtomicAdd(float *addr, float data) -{ - while (data) - data = atomicExch(addr, data+atomicExch(addr, 0.0f)); -} - -__device__ void -to_diffuse(Photon &p, State &s, Geometry *g, curandState &rng, int max_steps) -{ - int steps = 0; - while (steps < max_steps) { - steps++; - - int command; - - fill_state(s, p, g); - - if (p.last_hit_triangle == -1) - break; - - command = propagate_to_boundary(p, s, rng); - - if (command == BREAK) - break; - - if (command == CONTINUE) - continue; - - if (s.surface_index != -1) { - command = propagate_at_surface(p, s, rng, g); - - if (p.history & REFLECT_DIFFUSE) - break; - - if (command == BREAK) - break; - - if (command == CONTINUE) - continue; - } - - propagate_at_boundary(p, s, rng); - - } // while (steps < max_steps) - -} // to_diffuse - -extern "C" -{ - -__global__ void -update_xyz_lookup(int nthreads, int total_threads, int offset, float3 position, - curandState *rng_states, float wavelength, float3 xyz, - float3 *xyz_lookup1, float3 *xyz_lookup2, int max_steps, - Geometry *g) -{ - int kernel_id = blockIdx.x*blockDim.x + threadIdx.x; - int id = kernel_id + offset; - - if (kernel_id >= nthreads || id >= total_threads) - return; - - curandState rng = rng_states[kernel_id]; - - Triangle t = get_triangle(g, id); - - float a = curand_uniform(&rng); - float b = uniform(&rng, 0.0f, (1.0f - a)); - float c = 1.0f - a - b; - - float3 direction = a*t.v0 + b*t.v1 + c*t.v2 - position; - direction /= norm(direction); - - float distance; - int triangle_index = intersect_mesh(position, direction, g, distance); - - if (triangle_index != id) { - rng_states[kernel_id] = rng; - return; - } - - float3 v01 = t.v1 - t.v0; - float3 v12 = t.v2 - t.v1; - - float3 surface_normal = normalize(cross(v01,v12)); - - float cos_theta = dot(surface_normal,-direction); - - if (cos_theta < 0.0f) - cos_theta = dot(-surface_normal,-direction); - - Photon p; - p.position = position; - p.direction = direction; - p.wavelength = wavelength; - p.polarization = uniform_sphere(&rng); - p.last_hit_triangle = -1; - p.time = 0; - p.history = 0; - - State s; - to_diffuse(p, s, g, rng, max_steps); - - if (p.history & REFLECT_DIFFUSE) { - if (s.inside_to_outside) { - fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].x, cos_theta*xyz.x); - fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].y, cos_theta*xyz.y); - fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].z, cos_theta*xyz.z); - } - else { - fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].x, cos_theta*xyz.x); - fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].y, cos_theta*xyz.y); - fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].z, cos_theta*xyz.z); - } - } - - rng_states[kernel_id] = rng; - -} // update_xyz_lookup - -__global__ void -update_xyz_image(int nthreads, curandState *rng_states, float3 *positions, - float3 *directions, float wavelength, float3 xyz, - float3 *xyz_lookup1, float3 *xyz_lookup2, float3 *image, - int nlookup_calls, int max_steps, Geometry *g) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - curandState rng = rng_states[id]; - - Photon p; - p.position = positions[id]; - p.direction = directions[id]; - p.direction /= norm(p.direction); - p.wavelength = wavelength; - p.polarization = uniform_sphere(&rng); - p.last_hit_triangle = -1; - p.time = 0; - p.history = 0; - - State s; - to_diffuse(p, s, g, rng, max_steps); - - if (p.history & REFLECT_DIFFUSE) { - if (s.inside_to_outside) - image[id] += xyz*xyz_lookup1[p.last_hit_triangle]/nlookup_calls; - else - image[id] += xyz*xyz_lookup2[p.last_hit_triangle]/nlookup_calls; - } - - rng_states[id] = rng; - -} // update_xyz_image - -__global__ void -process_image(int nthreads, float3 *image, unsigned int *pixels, int nimages) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - float3 rgb = image[id]/nimages; - - if (rgb.x < 0.0f) - rgb.x = 0.0f; - if (rgb.y < 0.0f) - rgb.y = 0.0f; - if (rgb.z < 0.0f) - rgb.z = 0.0f; - - if (rgb.x > 1.0f) - rgb.x = 1.0f; - if (rgb.y > 1.0f) - rgb.y = 1.0f; - if (rgb.z > 1.0f) - rgb.z = 1.0f; - - unsigned int r = floorf(rgb.x*255.0f); - unsigned int g = floorf(rgb.y*255.0f); - unsigned int b = floorf(rgb.z*255.0f); - - pixels[id] = 255 << 24 | r << 16 | g << 8 | b; - -} // process_image - -} // extern "c" diff --git a/src/intersect.h b/src/intersect.h deleted file mode 100644 index 978bde8..0000000 --- a/src/intersect.h +++ /dev/null @@ -1,141 +0,0 @@ -//-*-c-*- - -#ifndef __INTERSECT_H__ -#define __INTERSECT_H__ - -#include "linalg.h" -#include "matrix.h" -#include "geometry.h" - -#define EPSILON 0.0f - -/* Tests the intersection between a ray and a triangle. - If the ray intersects the triangle, set `distance` to the distance from - `origin` to the intersection and return true, else return false. - `direction` must be normalized to one. */ -__device__ bool -intersect_triangle(const float3 &origin, const float3 &direction, - const Triangle &triangle, float &distance) -{ - float3 m1 = triangle.v1-triangle.v0; - float3 m2 = triangle.v2-triangle.v0; - float3 m3 = -direction; - - Matrix m = make_matrix(m1, m2, m3); - - float determinant = det(m); - - if (determinant == 0.0f) - return false; - - float3 b = origin-triangle.v0; - - float u1 = ((m.a11*m.a22 - m.a12*m.a21)*b.x + - (m.a02*m.a21 - m.a01*m.a22)*b.y + - (m.a01*m.a12 - m.a02*m.a11)*b.z)/determinant; - - if (u1 < -EPSILON || u1 > 1.0f) - return false; - - float u2 = ((m.a12*m.a20 - m.a10*m.a22)*b.x + - (m.a00*m.a22 - m.a02*m.a20)*b.y + - (m.a02*m.a10 - m.a00*m.a12)*b.z)/determinant; - - if (u2 < -EPSILON || u2 > 1.0f) - return false; - - float u3 = ((m.a10*m.a21 - m.a11*m.a20)*b.x + - (m.a01*m.a20 - m.a00*m.a21)*b.y + - (m.a00*m.a11 - m.a01*m.a10)*b.z)/determinant; - - if (u3 <= 0.0f || (1.0f-u1-u2) < -EPSILON) - return false; - - distance = u3; - - return true; -} - -/* Tests the intersection between a ray and an axis-aligned box defined by - an upper and lower bound. If the ray intersects the box, set - `distance_to_box` to the distance from `origin` to the intersection and - return true, else return false. `direction` must be normalized to one. - - Source: "An Efficient and Robust Ray-Box Intersection Algorithm." - by Williams, et. al. */ -__device__ bool -intersect_box(const float3 &origin, const float3 &direction, - const float3 &lower_bound, const float3 &upper_bound, - float& distance_to_box) -{ - float kmin, kmax, kymin, kymax, kzmin, kzmax; - - float divx = 1.0f/direction.x; - if (divx >= 0.0f) - { - kmin = (lower_bound.x - origin.x)*divx; - kmax = (upper_bound.x - origin.x)*divx; - } - else - { - kmin = (upper_bound.x - origin.x)*divx; - kmax = (lower_bound.x - origin.x)*divx; - } - - if (kmax < 0.0f) - return false; - - float divy = 1.0f/direction.y; - if (divy >= 0.0f) - { - kymin = (lower_bound.y - origin.y)*divy; - kymax = (upper_bound.y - origin.y)*divy; - } - else - { - kymin = (upper_bound.y - origin.y)*divy; - kymax = (lower_bound.y - origin.y)*divy; - } - - if (kymax < 0.0f) - return false; - - if (kymin > kmin) - kmin = kymin; - - if (kymax < kmax) - kmax = kymax; - - if (kmin > kmax) - return false; - - float divz = 1.0f/direction.z; - if (divz >= 0.0f) - { - kzmin = (lower_bound.z - origin.z)*divz; - kzmax = (upper_bound.z - origin.z)*divz; - } - else - { - kzmin = (upper_bound.z - origin.z)*divz; - kzmax = (lower_bound.z - origin.z)*divz; - } - - if (kzmax < 0.0f) - return false; - - if (kzmin > kmin) - kmin = kzmin; - - if (kzmax < kmax) - kmax = kzmax; - - if (kmin > kmax) - return false; - - distance_to_box = kmin; - - return true; -} - -#endif diff --git a/src/linalg.h b/src/linalg.h deleted file mode 100644 index 35b2423..0000000 --- a/src/linalg.h +++ /dev/null @@ -1,170 +0,0 @@ -#ifndef __LINALG_H__ -#define __LINALG_H__ - -__device__ float3 -operator- (const float3 &a) -{ - return make_float3(-a.x, -a.y, -a.z); -} - -__device__ float3 -operator* (const float3 &a, const float3 &b) -{ - return make_float3(a.x*b.x, a.y*b.y, a.z*b.z); -} - -__device__ float3 -operator/ (const float3 &a, const float3 &b) -{ - return make_float3(a.x/b.x, a.y/b.y, a.z/b.z); -} - -__device__ void -operator*= (float3 &a, const float3 &b) -{ - a.x *= b.x; - a.y *= b.y; - a.z *= b.z; -} - -__device__ void -operator/= (float3 &a, const float3 &b) -{ - a.x /= b.x; - a.y /= b.y; - a.z /= b.z; -} - -__device__ float3 -operator+ (const float3 &a, const float3 &b) -{ - return make_float3(a.x+b.x, a.y+b.y, a.z+b.z); -} - -__device__ void -operator+= (float3 &a, const float3 &b) -{ - a.x += b.x; - a.y += b.y; - a.z += b.z; -} - -__device__ float3 -operator- (const float3 &a, const float3 &b) -{ - return make_float3(a.x-b.x, a.y-b.y, a.z-b.z); -} - -__device__ void -operator-= (float3 &a, const float3 &b) -{ - a.x -= b.x; - a.y -= b.y; - a.z -= b.z; -} - -__device__ float3 -operator+ (const float3 &a, const float &c) -{ - return make_float3(a.x+c, a.y+c, a.z+c); -} - -__device__ void -operator+= (float3 &a, const float &c) -{ - a.x += c; - a.y += c; - a.z += c; -} - -__device__ float3 -operator+ (const float &c, const float3 &a) -{ - return make_float3(c+a.x, c+a.y, c+a.z); -} - -__device__ float3 -operator- (const float3 &a, const float &c) -{ - return make_float3(a.x-c, a.y-c, a.z-c); -} - -__device__ void -operator-= (float3 &a, const float &c) -{ - a.x -= c; - a.y -= c; - a.z -= c; -} - -__device__ float3 -operator- (const float &c, const float3& a) -{ - return make_float3(c-a.x, c-a.y, c-a.z); -} - -__device__ float3 -operator* (const float3 &a, const float &c) -{ - return make_float3(a.x*c, a.y*c, a.z*c); -} - -__device__ void -operator*= (float3 &a, const float &c) -{ - a.x *= c; - a.y *= c; - a.z *= c; -} - -__device__ float3 -operator* (const float &c, const float3& a) -{ - return make_float3(c*a.x, c*a.y, c*a.z); -} - -__device__ float3 -operator/ (const float3 &a, const float &c) -{ - return make_float3(a.x/c, a.y/c, a.z/c); -} - -__device__ void -operator/= (float3 &a, const float &c) -{ - a.x /= c; - a.y /= c; - a.z /= c; -} - -__device__ float3 -operator/ (const float &c, const float3 &a) -{ - return make_float3(c/a.x, c/a.y, c/a.z); -} - -__device__ float -dot(const float3 &a, const float3 &b) -{ - return a.x*b.x + a.y*b.y + a.z*b.z; -} - -__device__ float3 -cross(const float3 &a, const float3 &b) -{ - return make_float3(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); -} - -__device__ float -norm(const float3 &a) -{ - return sqrtf(dot(a,a)); -} - -__device__ float3 -normalize(const float3 &a) -{ - return a/norm(a); -} - -#endif diff --git a/src/matrix.h b/src/matrix.h deleted file mode 100644 index 0a66e58..0000000 --- a/src/matrix.h +++ /dev/null @@ -1,249 +0,0 @@ -#ifndef __MATRIX_H__ -#define __MATRIX_H__ - -struct Matrix -{ - float a00, a01, a02, a10, a11, a12, a20, a21, a22; -}; - -__device__ Matrix -make_matrix(float a00, float a01, float a02, - float a10, float a11, float a12, - float a20, float a21, float a22) -{ - Matrix m = {a00, a01, a02, a10, a11, a12, a20, a21, a22}; - return m; -} - -__device__ Matrix -make_matrix(const float3 &u1, const float3 &u2, const float3 &u3) -{ - Matrix m = {u1.x, u2.x, u3.x, u1.y, u2.y, u3.y, u1.z, u2.z, u3.z}; - return m; -} - -__device__ Matrix -operator- (const Matrix &m) -{ - return make_matrix(-m.a00, -m.a01, -m.a02, - -m.a10, -m.a11, -m.a12, - -m.a20, -m.a21, -m.a22); -} - -__device__ float3 -operator* (const Matrix &m, const float3 &a) -{ - return make_float3(m.a00*a.x + m.a01*a.y + m.a02*a.z, - m.a10*a.x + m.a11*a.y + m.a12*a.z, - m.a20*a.x + m.a21*a.y + m.a22*a.z); -} - -__device__ Matrix -operator+ (const Matrix &m, const Matrix &n) -{ - return make_matrix(m.a00+n.a00, m.a01+n.a01, m.a02+n.a02, - m.a10+n.a10, m.a11+n.a11, m.a12+n.a12, - m.a20+n.a20, m.a21+n.a21, m.a22+n.a22); -} - -__device__ void -operator+= (Matrix &m, const Matrix &n) -{ - m.a00 += n.a00; - m.a01 += n.a01; - m.a02 += n.a02; - m.a10 += n.a10; - m.a11 += n.a11; - m.a12 += n.a12; - m.a20 += n.a20; - m.a21 += n.a21; - m.a22 += n.a22; -} - -__device__ Matrix -operator- (const Matrix &m, const Matrix &n) -{ - return make_matrix(m.a00-n.a00, m.a01-n.a01, m.a02-n.a02, - m.a10-n.a10, m.a11-n.a11, m.a12-n.a12, - m.a20-n.a20, m.a21-n.a21, m.a22-n.a22); -} - -__device__ void -operator-= (Matrix &m, const Matrix& n) -{ - m.a00 -= n.a00; - m.a01 -= n.a01; - m.a02 -= n.a02; - m.a10 -= n.a10; - m.a11 -= n.a11; - m.a12 -= n.a12; - m.a20 -= n.a20; - m.a21 -= n.a21; - m.a22 -= n.a22; -} - -__device__ Matrix -operator* (const Matrix &m, const Matrix &n) -{ - return make_matrix(m.a00*n.a00 + m.a01*n.a10 + m.a02*n.a20, - m.a00*n.a01 + m.a01*n.a11 + m.a02*n.a21, - m.a00*n.a02 + m.a01*n.a12 + m.a02*n.a22, - m.a10*n.a00 + m.a11*n.a10 + m.a12*n.a20, - m.a10*n.a01 + m.a11*n.a11 + m.a12*n.a21, - m.a10*n.a02 + m.a11*n.a12 + m.a12*n.a22, - m.a20*n.a00 + m.a21*n.a10 + m.a22*n.a20, - m.a20*n.a01 + m.a21*n.a11 + m.a22*n.a21, - m.a20*n.a02 + m.a21*n.a12 + m.a22*n.a22); -} - -__device__ Matrix -operator+ (const Matrix &m, const float &c) -{ - return make_matrix(m.a00+c, m.a01+c, m.a02+c, - m.a10+c, m.a11+c, m.a12+c, - m.a20+c, m.a21+c, m.a22+c); -} - -__device__ void -operator+= (Matrix &m, const float &c) -{ - m.a00 += c; - m.a01 += c; - m.a02 += c; - m.a10 += c; - m.a11 += c; - m.a12 += c; - m.a20 += c; - m.a21 += c; - m.a22 += c; -} - -__device__ Matrix -operator+ (const float &c, const Matrix &m) -{ - return make_matrix(c+m.a00, c+m.a01, c+m.a02, - c+m.a10, c+m.a11, c+m.a12, - c+m.a20, c+m.a21, c+m.a22); -} - -__device__ Matrix -operator- (const Matrix &m, const float &c) -{ - return make_matrix(m.a00-c, m.a01-c, m.a02-c, - m.a10-c, m.a11-c, m.a12-c, - m.a20-c, m.a21-c, m.a22-c); -} - -__device__ void -operator-= (Matrix &m, const float &c) -{ - m.a00 -= c; - m.a01 -= c; - m.a02 -= c; - m.a10 -= c; - m.a11 -= c; - m.a12 -= c; - m.a20 -= c; - m.a21 -= c; - m.a22 -= c; -} - -__device__ Matrix -operator- (const float &c, const Matrix &m) -{ - return make_matrix(c-m.a00, c-m.a01, c-m.a02, - c-m.a10, c-m.a11, c-m.a12, - c-m.a20, c-m.a21, c-m.a22); -} - -__device__ Matrix -operator* (const Matrix &m, const float &c) -{ - return make_matrix(m.a00*c, m.a01*c, m.a02*c, - m.a10*c, m.a11*c, m.a12*c, - m.a20*c, m.a21*c, m.a22*c); -} - -__device__ void -operator*= (Matrix &m, const float &c) -{ - m.a00 *= c; - m.a01 *= c; - m.a02 *= c; - m.a10 *= c; - m.a11 *= c; - m.a12 *= c; - m.a20 *= c; - m.a21 *= c; - m.a22 *= c; -} - -__device__ Matrix -operator* (const float &c, const Matrix &m) -{ - return make_matrix(c*m.a00, c*m.a01, c*m.a02, - c*m.a10, c*m.a11, c*m.a12, - c*m.a20, c*m.a21, c*m.a22); -} - -__device__ Matrix -operator/ (const Matrix &m, const float &c) -{ - return make_matrix(m.a00/c, m.a01/c, m.a02/c, - m.a10/c, m.a11/c, m.a12/c, - m.a20/c, m.a21/c, m.a22/c); -} - -__device__ void -operator/= (Matrix &m, const float &c) -{ - m.a00 /= c; - m.a01 /= c; - m.a02 /= c; - m.a10 /= c; - m.a11 /= c; - m.a12 /= c; - m.a20 /= c; - m.a21 /= c; - m.a22 /= c; -} - -__device__ Matrix -operator/ (const float &c, const Matrix &m) -{ - return make_matrix(c/m.a00, c/m.a01, c/m.a02, - c/m.a10, c/m.a11, c/m.a12, - c/m.a20, c/m.a21, c/m.a22); -} - -__device__ float -det(const Matrix &m) -{ - return m.a00*(m.a11*m.a22 - m.a12*m.a21) - - m.a10*(m.a01*m.a22 - m.a02*m.a21) + - m.a20*(m.a01*m.a12 - m.a02*m.a11); -} - -__device__ Matrix -inv(const Matrix &m) -{ - return make_matrix(m.a11*m.a22 - m.a12*m.a21, - m.a02*m.a21 - m.a01*m.a22, - m.a01*m.a12 - m.a02*m.a11, - m.a12*m.a20 - m.a10*m.a22, - m.a00*m.a22 - m.a02*m.a20, - m.a02*m.a10 - m.a00*m.a12, - m.a10*m.a21 - m.a11*m.a20, - m.a01*m.a20 - m.a00*m.a21, - m.a00*m.a11 - m.a01*m.a10)/det(m); -} - -__device__ Matrix -outer(const float3 &a, const float3 &b) -{ - return make_matrix(a.x*b.x, a.x*b.y, a.x*b.z, - a.y*b.x, a.y*b.y, a.y*b.z, - a.z*b.x, a.z*b.y, a.z*b.z); -} - -#endif diff --git a/src/mesh.h b/src/mesh.h deleted file mode 100644 index d60d801..0000000 --- a/src/mesh.h +++ /dev/null @@ -1,175 +0,0 @@ -#ifndef __MESH_H__ -#define __MESH_H__ - -#include "intersect.h" -#include "geometry.h" - -#include "stdio.h" - -#define STACK_SIZE 100 - -/* Tests the intersection between a ray and a node in the bounding volume - hierarchy. If the ray intersects the bounding volume and `min_distance` - is less than zero or the distance from `origin` to the intersection is - less than `min_distance`, return true, else return false. */ -__device__ bool -intersect_node(const float3 &origin, const float3 &direction, - Geometry *g, int i, float min_distance=-1.0f) -{ - /* assigning these to local variables is faster for some reason */ - float3 lower_bound = g->lower_bounds[i]; - float3 upper_bound = g->upper_bounds[i]; - - float distance_to_box; - - if (intersect_box(origin, direction, lower_bound, upper_bound, - distance_to_box)) { - if (min_distance < 0.0f) - return true; - - if (distance_to_box > min_distance) - return false; - - return true; - } - else { - return false; - } - -} - -/* Finds the intersection between a ray and `geometry`. If the ray does - intersect the mesh and the index of the intersected triangle is not equal - to `last_hit_triangle`, set `min_distance` to the distance from `origin` to - the intersection and return the index of the triangle which the ray - intersected, else return -1. */ -__device__ int -intersect_mesh(const float3 &origin, const float3& direction, Geometry *g, - float &min_distance, int last_hit_triangle = -1) -{ - int triangle_index = -1; - - float distance; - min_distance = -1.0f; - - if (!intersect_node(origin, direction, g, g->start_node, min_distance)) - return -1; - - unsigned int stack[STACK_SIZE]; - - unsigned int *head = &stack[0]; - unsigned int *node = &stack[1]; - unsigned int *tail = &stack[STACK_SIZE-1]; - *node = g->start_node; - - unsigned int i; - - do - { - unsigned int first_child = g->node_map[*node]; - unsigned int stop = g->node_map_end[*node]; - - while (*node >= g->first_node && stop == first_child+1) { - *node = first_child; - first_child = g->node_map[*node]; - stop = g->node_map_end[*node]; - } - - if (*node >= g->first_node) { - for (i=first_child; i < stop; i++) { - if (intersect_node(origin, direction, g, i, min_distance)) { - *node = i; - node++; - } - } - - node--; - } - else { - // node is a leaf - for (i=first_child; i < stop; i++) { - if (last_hit_triangle == i) - continue; - - Triangle t = get_triangle(g, i); - - if (intersect_triangle(origin, direction, t, distance)) { - if (triangle_index == -1) { - triangle_index = i; - min_distance = distance; - continue; - } - - if (distance < min_distance) { - triangle_index = i; - min_distance = distance; - } - } - } // triangle loop - - node--; - - } // node is a leaf - - if (node > tail) { - printf("warning: intersect_mesh() aborted; node > tail\n"); - break; - } - - } // while loop - while (node != head); - - return triangle_index; -} - -extern "C" -{ - -__global__ void -distance_to_mesh(int nthreads, float3 *_origin, float3 *_direction, - Geometry *g, float *_distance) -{ - __shared__ Geometry sg; - - if (threadIdx.x == 0) - sg = *g; - - __syncthreads(); - - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - g = &sg; - - float3 origin = _origin[id]; - float3 direction = _direction[id]; - direction /= norm(direction); - - float distance; - - int triangle_index = intersect_mesh(origin, direction, g, distance); - - if (triangle_index != -1) - _distance[id] = distance; -} - -__global__ void -color_solids(int first_triangle, int nthreads, int *solid_id_map, - bool *solid_hit, unsigned int *solid_colors, Geometry *g) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - int triangle_id = first_triangle + id; - int solid_id = solid_id_map[triangle_id]; - if (solid_hit[solid_id]) - g->colors[triangle_id] = solid_colors[solid_id]; -} - -} // extern "C" - -#endif diff --git a/src/photon.h b/src/photon.h deleted file mode 100644 index 8b7e588..0000000 --- a/src/photon.h +++ /dev/null @@ -1,325 +0,0 @@ -#ifndef __PHOTON_H__ -#define __PHOTON_H__ - -#include "stdio.h" -#include "linalg.h" -#include "rotate.h" -#include "random.h" -#include "physical_constants.h" -#include "mesh.h" -#include "geometry.h" - -struct Photon -{ - float3 position; - float3 direction; - float3 polarization; - float wavelength; - float time; - - unsigned int history; - - int last_hit_triangle; -}; - -struct State -{ - bool inside_to_outside; - - float3 surface_normal; - - float refractive_index1, refractive_index2; - float absorption_length; - float scattering_length; - - int surface_index; - - float distance_to_boundary; -}; - -enum -{ - NO_HIT = 0x1 << 0, - BULK_ABSORB = 0x1 << 1, - SURFACE_DETECT = 0x1 << 2, - SURFACE_ABSORB = 0x1 << 3, - RAYLEIGH_SCATTER = 0x1 << 4, - REFLECT_DIFFUSE = 0x1 << 5, - REFLECT_SPECULAR = 0x1 << 6, - NAN_ABORT = 0x1 << 31 -}; // processes - -enum {BREAK, CONTINUE, PASS}; // return value from propagate_to_boundary - -__device__ int -convert(int c) -{ - if (c & 0x80) - return (0xFFFFFF00 | c); - else - return c; -} - -__device__ float -get_theta(const float3 &a, const float3 &b) -{ - return acosf(fmaxf(-1.0f,fminf(1.0f,dot(a,b)))); -} - -__device__ void -fill_state(State &s, Photon &p, Geometry *g) -{ - p.last_hit_triangle = intersect_mesh(p.position, p.direction, g, - s.distance_to_boundary, - p.last_hit_triangle); - - if (p.last_hit_triangle == -1) { - p.history |= NO_HIT; - return; - } - - Triangle t = get_triangle(g, p.last_hit_triangle); - - unsigned int material_code = g->material_codes[p.last_hit_triangle]; - - int inner_material_index = convert(0xFF & (material_code >> 24)); - int outer_material_index = convert(0xFF & (material_code >> 16)); - s.surface_index = convert(0xFF & (material_code >> 8)); - - float3 v01 = t.v1 - t.v0; - float3 v12 = t.v2 - t.v1; - - s.surface_normal = normalize(cross(v01, v12)); - - Material *material1, *material2; - if (dot(s.surface_normal,-p.direction) > 0.0f) { - // outside to inside - material1 = g->materials[outer_material_index]; - material2 = g->materials[inner_material_index]; - - s.inside_to_outside = false; - } - else { - // inside to outside - material1 = g->materials[inner_material_index]; - material2 = g->materials[outer_material_index]; - s.surface_normal = -s.surface_normal; - - s.inside_to_outside = true; - } - - s.refractive_index1 = interp_property(material1, p.wavelength, - material1->refractive_index); - s.refractive_index2 = interp_property(material2, p.wavelength, - material2->refractive_index); - s.absorption_length = interp_property(material1, p.wavelength, - material1->absorption_length); - s.scattering_length = interp_property(material1, p.wavelength, - material1->scattering_length); - -} // fill_state - -__device__ float3 -pick_new_direction(float3 axis, float theta, float phi) -{ - // Taken from SNOMAN rayscatter.for - float cos_theta = cosf(theta); - float sin_theta = sinf(theta); - float cos_phi = cosf(phi); - float sin_phi = sinf(phi); - - float sin_axis_theta = sqrt(1.0f - axis.z*axis.z); - float cos_axis_phi, sin_axis_phi; - - if (isnan(sin_axis_theta) || sin_axis_theta < 0.00001f) { - cos_axis_phi = 1.0f; - sin_axis_phi = 0.0f; - } - else { - cos_axis_phi = axis.x / sin_axis_theta; - sin_axis_phi = axis.y / sin_axis_theta; - } - - float dirx = cos_theta*axis.x + - sin_theta*(axis.z*cos_phi*cos_axis_phi - sin_phi*sin_axis_phi); - float diry = cos_theta*axis.y + - sin_theta*(cos_phi*axis.z*sin_axis_phi - sin_phi*cos_axis_phi); - float dirz = cos_theta*axis.z - sin_theta*cos_phi*sin_axis_theta; - - return make_float3(dirx, diry, dirz); -} - -__device__ void -rayleigh_scatter(Photon &p, curandState &rng) -{ - float cos_theta = 2.0f*cosf((acosf(1.0f - 2.0f*curand_uniform(&rng))-2*PI)/3.0f); - if (cos_theta > 1.0f) - cos_theta = 1.0f; - else if (cos_theta < -1.0f) - cos_theta = -1.0f; - - float theta = acosf(cos_theta); - float phi = uniform(&rng, 0.0f, 2.0f * PI); - - p.direction = pick_new_direction(p.polarization, theta, phi); - - if (1.0f - fabsf(cos_theta) < 1e-6f) { - p.polarization = pick_new_direction(p.polarization, PI/2.0f, phi); - } - else { - // linear combination of old polarization and new direction - p.polarization = p.polarization - cos_theta * p.direction; - } - - p.direction /= norm(p.direction); - p.polarization /= norm(p.polarization); -} // scatter - -__device__ int propagate_to_boundary(Photon &p, State &s, curandState &rng) -{ - float absorption_distance = -s.absorption_length*logf(curand_uniform(&rng)); - float scattering_distance = -s.scattering_length*logf(curand_uniform(&rng)); - - if (absorption_distance <= scattering_distance) { - if (absorption_distance <= s.distance_to_boundary) { - p.time += absorption_distance/(SPEED_OF_LIGHT/s.refractive_index1); - p.position += absorption_distance*p.direction; - p.history |= BULK_ABSORB; - - p.last_hit_triangle = -1; - - return BREAK; - } // photon is absorbed in material1 - } - else { - if (scattering_distance <= s.distance_to_boundary) { - p.time += scattering_distance/(SPEED_OF_LIGHT/s.refractive_index1); - p.position += scattering_distance*p.direction; - - rayleigh_scatter(p, rng); - - p.history |= RAYLEIGH_SCATTER; - - p.last_hit_triangle = -1; - - return CONTINUE; - } // photon is scattered in material1 - } // if scattering_distance < absorption_distance - - p.position += s.distance_to_boundary*p.direction; - p.time += s.distance_to_boundary/(SPEED_OF_LIGHT/s.refractive_index1); - - return PASS; - -} // propagate_to_boundary - -__device__ void -propagate_at_boundary(Photon &p, State &s, curandState &rng) -{ - float incident_angle = get_theta(s.surface_normal,-p.direction); - float refracted_angle = asinf(sinf(incident_angle)*s.refractive_index1/s.refractive_index2); - - float3 incident_plane_normal = cross(p.direction, s.surface_normal); - float incident_plane_normal_length = norm(incident_plane_normal); - - // Photons at normal incidence do not have a unique plane of incidence, - // so we have to pick the plane normal to be the polarization vector - // to get the correct logic below - if (incident_plane_normal_length < 1e-6f) - incident_plane_normal = p.polarization; - else - incident_plane_normal /= incident_plane_normal_length; - - float normal_coefficient = dot(p.polarization, incident_plane_normal); - float normal_probability = normal_coefficient*normal_coefficient; - - float reflection_coefficient; - if (curand_uniform(&rng) < normal_probability) { - // photon polarization normal to plane of incidence - reflection_coefficient = -sinf(incident_angle-refracted_angle)/sinf(incident_angle+refracted_angle); - - if ((curand_uniform(&rng) < reflection_coefficient*reflection_coefficient) || isnan(refracted_angle)) { - p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal); - - p.history |= REFLECT_SPECULAR; - } - else { - p.direction = rotate(s.surface_normal, PI-refracted_angle, incident_plane_normal); - } - - p.polarization = incident_plane_normal; - } - else { - // photon polarization parallel to plane of incidence - reflection_coefficient = tanf(incident_angle-refracted_angle)/tanf(incident_angle+refracted_angle); - - if ((curand_uniform(&rng) < reflection_coefficient*reflection_coefficient) || isnan(refracted_angle)) { - p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal); - - p.history |= REFLECT_SPECULAR; - } - else { - p.direction = rotate(s.surface_normal, PI-refracted_angle, incident_plane_normal); - } - - p.polarization = cross(incident_plane_normal, p.direction); - p.polarization /= norm(p.polarization); - } - -} // propagate_at_boundary - -__device__ int -propagate_at_surface(Photon &p, State &s, curandState &rng, Geometry *geometry) -{ - Surface *surface = geometry->surfaces[s.surface_index]; - - /* since the surface properties are interpolated linearly, we are - guaranteed that they still sum to 1.0. */ - - float detect = interp_property(surface, p.wavelength, surface->detect); - float absorb = interp_property(surface, p.wavelength, surface->absorb); - float reflect_diffuse = interp_property(surface, p.wavelength, surface->reflect_diffuse); - float reflect_specular = interp_property(surface, p.wavelength, surface->reflect_specular); - - float uniform_sample = curand_uniform(&rng); - - if (uniform_sample < absorb) { - p.history |= SURFACE_ABSORB; - return BREAK; - } - else if (uniform_sample < absorb + detect) { - p.history |= SURFACE_DETECT; - return BREAK; - } - else if (uniform_sample < absorb + detect + reflect_diffuse) { - // diffusely reflect - p.direction = uniform_sphere(&rng); - - if (dot(p.direction, s.surface_normal) < 0.0f) - p.direction = -p.direction; - - // randomize polarization? - p.polarization = cross(uniform_sphere(&rng), p.direction); - p.polarization /= norm(p.polarization); - - p.history |= REFLECT_DIFFUSE; - - return CONTINUE; - } - else { - // specularly reflect - float incident_angle = get_theta(s.surface_normal,-p.direction); - float3 incident_plane_normal = cross(p.direction, s.surface_normal); - incident_plane_normal /= norm(incident_plane_normal); - - p.direction = rotate(s.surface_normal, incident_angle, - incident_plane_normal); - - p.history |= REFLECT_SPECULAR; - - return CONTINUE; - } - -} // propagate_at_surface - -#endif diff --git a/src/physical_constants.h b/src/physical_constants.h deleted file mode 100644 index 2ff87cd..0000000 --- a/src/physical_constants.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef __PHYSICAL_CONSTANTS_H__ -#define __PHYSICAL_CONSTANTS_H__ - -#define SPEED_OF_LIGHT 2.99792458e8 -#define PI 3.141592653589793 - -#endif diff --git a/src/propagate.cu b/src/propagate.cu deleted file mode 100644 index fdcf532..0000000 --- a/src/propagate.cu +++ /dev/null @@ -1,147 +0,0 @@ -//-*-c-*- - -#include "linalg.h" -#include "geometry.h" -#include "photon.h" - -#include "stdio.h" - -extern "C" -{ - -__global__ void -photon_duplicate(int first_photon, int nthreads, - float3 *positions, float3 *directions, - float *wavelengths, float3 *polarizations, - float *times, unsigned int *histories, - int *last_hit_triangles, - int copies, int stride) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - int photon_id = first_photon + id; - - Photon p; - p.position = positions[photon_id]; - p.direction = directions[photon_id]; - p.polarization = polarizations[photon_id]; - p.wavelength = wavelengths[photon_id]; - p.time = times[photon_id]; - p.last_hit_triangle = last_hit_triangles[photon_id]; - p.history = histories[photon_id]; - - for (int i=1; i <= copies; i++) { - int target_photon_id = photon_id + stride * i; - - positions[target_photon_id] = p.position; - directions[target_photon_id] = p.direction; - polarizations[target_photon_id] = p.polarization; - wavelengths[target_photon_id] = p.wavelength; - times[target_photon_id] = p.time; - last_hit_triangles[target_photon_id] = p.last_hit_triangle; - histories[target_photon_id] = p.history; - } -} - -__global__ void -propagate(int first_photon, int nthreads, unsigned int *input_queue, - unsigned int *output_queue, curandState *rng_states, - float3 *positions, float3 *directions, - float *wavelengths, float3 *polarizations, - float *times, unsigned int *histories, - int *last_hit_triangles, int max_steps, - Geometry *g) -{ - __shared__ Geometry sg; - - if (threadIdx.x == 0) - sg = *g; - - __syncthreads(); - - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - g = &sg; - - curandState rng = rng_states[id]; - - int photon_id = input_queue[first_photon + id]; - - Photon p; - p.position = positions[photon_id]; - p.direction = directions[photon_id]; - p.direction /= norm(p.direction); - p.polarization = polarizations[photon_id]; - p.polarization /= norm(p.polarization); - p.wavelength = wavelengths[photon_id]; - p.time = times[photon_id]; - p.last_hit_triangle = last_hit_triangles[photon_id]; - p.history = histories[photon_id]; - - if (p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB)) - return; - - State s; - - int steps = 0; - while (steps < max_steps) { - steps++; - - int command; - - // check for NaN and fail - if (isnan(p.direction.x*p.direction.y*p.direction.z*p.position.x*p.position.y*p.position.z)) { - p.history |= NO_HIT | NAN_ABORT; - break; - } - - fill_state(s, p, g); - - if (p.last_hit_triangle == -1) - break; - - command = propagate_to_boundary(p, s, rng); - - if (command == BREAK) - break; - - if (command == CONTINUE) - continue; - - if (s.surface_index != -1) { - command = propagate_at_surface(p, s, rng, g); - - if (command == BREAK) - break; - - if (command == CONTINUE) - continue; - } - - propagate_at_boundary(p, s, rng); - - } // while (steps < max_steps) - - rng_states[id] = rng; - positions[photon_id] = p.position; - directions[photon_id] = p.direction; - polarizations[photon_id] = p.polarization; - wavelengths[photon_id] = p.wavelength; - times[photon_id] = p.time; - histories[photon_id] = p.history; - last_hit_triangles[photon_id] = p.last_hit_triangle; - - // Not done, put photon in output queue - if ((p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB)) == 0) { - int out_idx = atomicAdd(output_queue, 1); - output_queue[out_idx] = photon_id; - } -} // propagate - -} // extern "C" diff --git a/src/random.h b/src/random.h deleted file mode 100644 index e9d2e75..0000000 --- a/src/random.h +++ /dev/null @@ -1,87 +0,0 @@ -#ifndef __RANDOM_H__ -#define __RANDOM_H__ - -#include - -#include "physical_constants.h" - -__device__ float -uniform(curandState *s, const float &low, const float &high) -{ - return low + curand_uniform(s)*(high-low); -} - -__device__ float3 -uniform_sphere(curandState *s) -{ - float theta = uniform(s, 0.0f, 2*PI); - float u = uniform(s, -1.0f, 1.0f); - float c = sqrtf(1.0f-u*u); - - return make_float3(c*cosf(theta), c*sinf(theta), u); -} - -// Draw a random sample given a cumulative distribution function -// Assumptions: ncdf >= 2, cdf_y[0] is 0.0, and cdf_y[ncdf-1] is 1.0 -__device__ float -sample_cdf(curandState *rng, int ncdf, float *cdf_x, float *cdf_y) -{ - float u = curand_uniform(rng); - - // Find u in cdf_y with binary search - // list must contain at least 2 elements: 0.0 and 1.0 - int lower=0; - int upper=ncdf-1; - while(lower < upper-1) { - int half = (lower+upper) / 2; - if (u < cdf_y[half]) - upper = half; - else - lower = half; - } - - float frac = (u - cdf_y[lower]) / (cdf_y[upper] - cdf_y[lower]); - return cdf_x[lower] * frac + cdf_x[upper] * (1.0f - frac); -} - -extern "C" -{ - -__global__ void -init_rng(int nthreads, curandState *s, unsigned long long seed, - unsigned long long offset) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - curand_init(seed, id, offset, &s[id]); -} - -__global__ void -fill_uniform(int nthreads, curandState *s, float *a, float low, float high) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - a[id] = uniform(&s[id], low, high); - -} - -__global__ void -fill_uniform_sphere(int nthreads, curandState *s, float3 *a) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - a[id] = uniform_sphere(&s[id]); -} - -} // extern "c" - -#endif diff --git a/src/render.cu b/src/render.cu deleted file mode 100644 index d4ed443..0000000 --- a/src/render.cu +++ /dev/null @@ -1,171 +0,0 @@ -//-*-c-*- - -#include "linalg.h" -#include "intersect.h" -#include "mesh.h" -#include "sorting.h" -#include "geometry.h" - -#include "stdio.h" - -__device__ float4 -get_color(const float3 &direction, const Triangle &t, unsigned int rgba) -{ - float3 v01 = t.v1 - t.v0; - float3 v12 = t.v2 - t.v1; - - float3 surface_normal = normalize(cross(v01,v12)); - - float cos_theta = dot(surface_normal,-direction); - - if (cos_theta < 0.0f) - cos_theta = -cos_theta; - - unsigned int a0 = 0xff & (rgba >> 24); - unsigned int r0 = 0xff & (rgba >> 16); - unsigned int g0 = 0xff & (rgba >> 8); - unsigned int b0 = 0xff & rgba; - - float alpha = (255 - a0)/255.0f; - - return make_float4(r0*cos_theta, g0*cos_theta, b0*cos_theta, alpha); -} - -extern "C" -{ - -__global__ void -render(int nthreads, float3 *_origin, float3 *_direction, Geometry *g, - unsigned int alpha_depth, unsigned int *pixels, float *_dx, - unsigned int *dxlen, float4 *_color) -{ - __shared__ Geometry sg; - - if (threadIdx.x == 0) - sg = *g; - - __syncthreads(); - - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - g = &sg; - - float3 origin = _origin[id]; - float3 direction = _direction[id]; - unsigned int n = dxlen[id]; - - float distance; - - if (n < 1 && !intersect_node(origin, direction, g, g->start_node)) { - pixels[id] = 0; - return; - } - - unsigned int stack[STACK_SIZE]; - - unsigned int *head = &stack[0]; - unsigned int *node = &stack[1]; - unsigned int *tail = &stack[STACK_SIZE-1]; - *node = g->start_node; - - float *dx = _dx + id*alpha_depth; - float4 *color_a = _color + id*alpha_depth; - - unsigned int i; - - do { - unsigned int first_child = g->node_map[*node]; - unsigned int stop = g->node_map_end[*node]; - - while (*node >= g->first_node && stop == first_child+1) { - *node = first_child; - first_child = g->node_map[*node]; - stop = g->node_map_end[*node]; - } - - if (*node >= g->first_node) { - for (i=first_child; i < stop; i++) { - if (intersect_node(origin, direction, g, i)) { - *node = i; - node++; - } - } - - node--; - } - else { - // node is a leaf - for (i=first_child; i < stop; i++) { - Triangle t = get_triangle(g, i); - - if (intersect_triangle(origin, direction, t, distance)) { - if (n < 1) { - dx[0] = distance; - - unsigned int rgba = g->colors[i]; - float4 color = get_color(direction, t, rgba); - - color_a[0] = color; - } - else { - unsigned long j = searchsorted(n, dx, distance); - - if (j <= alpha_depth-1) { - insert(alpha_depth, dx, j, distance); - - unsigned int rgba = g->colors[i]; - float4 color = get_color(direction, t, rgba); - - insert(alpha_depth, color_a, j, color); - } - } - - if (n < alpha_depth) - n++; - } - - } // triangle loop - - node--; - - } // node is a leaf - - } // while loop - while (node != head); - - if (n < 1) { - pixels[id] = 0; - return; - } - - dxlen[id] = n; - - float scale = 1.0f; - float fr = 0.0f; - float fg = 0.0f; - float fb = 0.0f; - for (i=0; i < n; i++) { - float alpha = color_a[i].w; - - fr += scale*color_a[i].x*alpha; - fg += scale*color_a[i].y*alpha; - fb += scale*color_a[i].z*alpha; - - scale *= (1.0f-alpha); - } - unsigned int a; - if (n < alpha_depth) - a = floorf(255*(1.0f-scale)); - else - a = 255; - unsigned int red = floorf(fr/(1.0f-scale)); - unsigned int green = floorf(fg/(1.0f-scale)); - unsigned int blue = floorf(fb/(1.0f-scale)); - - pixels[id] = a << 24 | red << 16 | green << 8 | blue; -} - -} // extern "C" diff --git a/src/rotate.h b/src/rotate.h deleted file mode 100644 index 15f8037..0000000 --- a/src/rotate.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef __ROTATE_H__ -#define __ROTATE_H__ - -#include "linalg.h" -#include "matrix.h" - -__device__ const Matrix IDENTITY_MATRIX = {1,0,0,0,1,0,0,0,1}; - -__device__ Matrix -make_rotation_matrix(float phi, const float3 &n) -{ - float cos_phi = cosf(phi); - float sin_phi = sinf(phi); - - return IDENTITY_MATRIX*cos_phi + (1-cos_phi)*outer(n,n) + - sin_phi*make_matrix(0,n.z,-n.y,-n.z,0,n.x,n.y,-n.x,0); -} - -/* rotate points counterclockwise, when looking towards +infinity, - through an angle `phi` about the axis `n`. */ -__device__ float3 -rotate(const float3 &a, float phi, const float3 &n) -{ - return make_rotation_matrix(phi,n)*a; -} - -#endif diff --git a/src/sorting.h b/src/sorting.h deleted file mode 100644 index 2bf1a5a..0000000 --- a/src/sorting.h +++ /dev/null @@ -1,107 +0,0 @@ -template -__device__ void -swap(T &a, T &b) -{ - T tmp = a; - a = b; - b = tmp; -} - -template -__device__ void -reverse(int n, T *a) -{ - for (int i=0; i < n/2; i++) - swap(a[i],a[n-1-i]); -} - -template -__device__ void -piksrt(int n, T *arr) -{ - int i,j; - T a; - - for (j=1; j < n; j++) { - a = arr[j]; - i = j-1; - while (i >= 0 && arr[i] > a) { - arr[i+1] = arr[i]; - i--; - } - arr[i+1] = a; - } -} - -template -__device__ void -piksrt2(int n, T *arr, U *brr) -{ - int i,j; - T a; - U b; - - for (j=1; j < n; j++) { - a = arr[j]; - b = brr[j]; - i = j-1; - while (i >= 0 && arr[i] > a) { - arr[i+1] = arr[i]; - brr[i+1] = brr[i]; - i--; - } - arr[i+1] = a; - brr[i+1] = b; - } -} - -/* Returns the index in `arr` where `x` should be inserted in order to - maintain order. If `n` equals one, return the index such that, when - `x` is inserted, `arr` will be in ascending order. -*/ -template -__device__ unsigned long -searchsorted(unsigned long n, T *arr, const T &x) -{ - unsigned long ju,jm,jl; - int ascnd; - - jl = 0; - ju = n; - - ascnd = (arr[n-1] >= arr[0]); - - while (ju-jl > 1) { - jm = (ju+jl) >> 1; - - if ((x > arr[jm]) == ascnd) - jl = jm; - else - ju = jm; - } - - if ((x <= arr[0]) == ascnd) - return 0; - else - return ju; -} - -template -__device__ void -insert(unsigned long n, T *arr, unsigned long i, const T &x) -{ - unsigned long j; - for (j=n-1; j > i; j--) - arr[j] = arr[j-1]; - arr[i] = x; -} - -template -__device__ void -add_sorted(unsigned long n, T *arr, const T &x) -{ - unsigned long i = searchsorted(n, arr, x); - - if (i < n) - insert(n, arr, i, x); -} diff --git a/src/tools.cu b/src/tools.cu deleted file mode 100644 index 80d06ec..0000000 --- a/src/tools.cu +++ /dev/null @@ -1,21 +0,0 @@ -//--*-c-*- - -extern "C" -{ - -__global__ void -interleave(int nthreads, unsigned long long *x, unsigned long long *y, - unsigned long long *z, int bits, unsigned long long *dest) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - for (int i=0; i < bits; i++) - dest[id] |= (x[id] & 1 << i) << (2*i) | - (y[id] & 1 << i) << (2*i+1) | - (z[id] & 1 << i) << (2*i+2); -} - -} // extern "C" diff --git a/src/transform.cu b/src/transform.cu deleted file mode 100644 index 1f4405e..0000000 --- a/src/transform.cu +++ /dev/null @@ -1,51 +0,0 @@ -//-*-c-*- - -#include "linalg.h" -#include "rotate.h" - -extern "C" -{ - -/* Translate the points `a` by the vector `v` */ -__global__ void -translate(int nthreads, float3 *a, float3 v) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - a[id] += v; -} - -/* Rotate the points `a` through an angle `phi` counter-clockwise about the - axis `axis` (when looking towards +infinity). */ -__global__ void -rotate(int nthreads, float3 *a, float phi, float3 axis) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - a[id] = rotate(a[id], phi, axis); -} - -/* Rotate the points `a` through an angle `phi` counter-clockwise - (when looking towards +infinity along `axis`) about the axis defined - by the point `point` and the vector `axis` . */ -__global__ void -rotate_around_point(int nthreads, float3 *a, float phi, float3 axis, - float3 point) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - a[id] -= point; - a[id] = rotate(a[id], phi, axis); - a[id] += point; -} - -} // extern "c" diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/data/ray_intersection.npz b/test/data/ray_intersection.npz new file mode 100644 index 0000000..fba0e40 Binary files /dev/null and b/test/data/ray_intersection.npz differ diff --git a/test/linalg_test.cu b/test/linalg_test.cu new file mode 100644 index 0000000..4e9c983 --- /dev/null +++ b/test/linalg_test.cu @@ -0,0 +1,128 @@ +//-*-c-*- + +#include "linalg.h" + +extern "C" +{ + +__global__ void float3add(float3 *a, float3 *b, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = a[idx] + b[idx]; +} + +__global__ void float3addequal(float3 *a, float3 *b) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + a[idx] += b[idx]; +} + +__global__ void float3sub(float3 *a, float3 *b, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = a[idx] - b[idx]; +} + +__global__ void float3subequal(float3 *a, float3 *b) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + a[idx] -= b[idx]; +} + +__global__ void float3addfloat(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = a[idx] + c; +} + +__global__ void float3addfloatequal(float3 *a, float c) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + a[idx] += c; +} + +__global__ void floataddfloat3(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = c + a[idx]; +} + +__global__ void float3subfloat(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = a[idx] - c; +} + +__global__ void float3subfloatequal(float3 *a, float c) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + a[idx] -= c; +} + +__global__ void floatsubfloat3(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = c - a[idx]; +} + +__global__ void float3mulfloat(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = a[idx]*c; +} + +__global__ void float3mulfloatequal(float3 *a, float c) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + a[idx] *= c; +} + +__global__ void floatmulfloat3(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = c*a[idx]; +} + +__global__ void float3divfloat(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = a[idx]/c; +} + +__global__ void float3divfloatequal(float3 *a, float c) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + a[idx] /= c; +} + +__global__ void floatdivfloat3(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = c/a[idx]; +} + +__global__ void dot(float3 *a, float3 *b, float *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = dot(a[idx],b[idx]); +} + +__global__ void cross(float3 *a, float3 *b, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = cross(a[idx],b[idx]); +} + +__global__ void norm(float3 *a, float *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = norm(a[idx]); +} + +__global__ void minusfloat3(float3 *a, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = -a[idx]; +} + +} // extern "c" diff --git a/test/linalg_test.py b/test/linalg_test.py new file mode 100644 index 0000000..31688d9 --- /dev/null +++ b/test/linalg_test.py @@ -0,0 +1,214 @@ +import os +import numpy as np +from pycuda import autoinit +from pycuda.compiler import SourceModule +import pycuda.driver as cuda +from pycuda import gpuarray + +float3 = gpuarray.vec.float3 + +print 'device %s' % autoinit.device.name() + +current_directory = os.path.split(os.path.realpath(__file__))[0] +source_directory = current_directory + '/../src' + +source = open(current_directory + '/linalg_test.cu').read() + +mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False) + +float3add = mod.get_function('float3add') +float3addequal = mod.get_function('float3addequal') +float3sub = mod.get_function('float3sub') +float3subequal = mod.get_function('float3subequal') +float3addfloat = mod.get_function('float3addfloat') +float3addfloatequal = mod.get_function('float3addfloatequal') +floataddfloat3 = mod.get_function('floataddfloat3') +float3subfloat = mod.get_function('float3subfloat') +float3subfloatequal = mod.get_function('float3subfloatequal') +floatsubfloat3 = mod.get_function('floatsubfloat3') +float3mulfloat = mod.get_function('float3mulfloat') +float3mulfloatequal = mod.get_function('float3mulfloatequal') +floatmulfloat3 = mod.get_function('floatmulfloat3') +float3divfloat = mod.get_function('float3divfloat') +float3divfloatequal = mod.get_function('float3divfloatequal') +floatdivfloat3 = mod.get_function('floatdivfloat3') +dot = mod.get_function('dot') +cross = mod.get_function('cross') +norm = mod.get_function('norm') +minusfloat3 = mod.get_function('minusfloat3') + +size = {'block': (256,1,1), 'grid': (1,1)} + +a = np.empty(size['block'][0], dtype=float3) +b = np.empty(size['block'][0], dtype=float3) +c = np.float32(np.random.random_sample()) + +a['x'] = np.random.random_sample(size=a.size) +a['y'] = np.random.random_sample(size=a.size) +a['z'] = np.random.random_sample(size=a.size) + +b['x'] = np.random.random_sample(size=b.size) +b['y'] = np.random.random_sample(size=b.size) +b['z'] = np.random.random_sample(size=b.size) + +def testfloat3add(): + dest = np.empty(a.size, dtype=float3) + float3add(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + if not np.allclose(a['x']+b['x'], dest['x']) or \ + not np.allclose(a['y']+b['y'], dest['y']) or \ + not np.allclose(a['z']+b['z'], dest['z']): + assert False + +def testfloat3sub(): + dest = np.empty(a.size, dtype=float3) + float3sub(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + if not np.allclose(a['x']-b['x'], dest['x']) or \ + not np.allclose(a['y']-b['y'], dest['y']) or \ + not np.allclose(a['z']-b['z'], dest['z']): + assert False + +def testfloat3addequal(): + dest = np.copy(a) + float3addequal(cuda.InOut(dest), cuda.In(b), **size) + if not np.allclose(a['x']+b['x'], dest['x']) or \ + not np.allclose(a['y']+b['y'], dest['y']) or \ + not np.allclose(a['z']+b['z'], dest['z']): + assert False + +def testfloat3subequal(): + dest = np.copy(a) + float3subequal(cuda.InOut(dest), cuda.In(b), **size) + if not np.allclose(a['x']-b['x'], dest['x']) or \ + not np.allclose(a['y']-b['y'], dest['y']) or \ + not np.allclose(a['z']-b['z'], dest['z']): + assert False + +def testfloat3addfloat(): + dest = np.empty(a.size, dtype=float3) + float3addfloat(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(a['x']+c, dest['x']) or \ + not np.allclose(a['y']+c, dest['y']) or \ + not np.allclose(a['z']+c, dest['z']): + assert False + +def testfloat3addfloatequal(): + dest = np.copy(a) + float3addfloatequal(cuda.InOut(dest), c, **size) + if not np.allclose(a['x']+c, dest['x']) or \ + not np.allclose(a['y']+c, dest['y']) or \ + not np.allclose(a['z']+c, dest['z']): + assert False + +def testfloataddfloat3(): + dest = np.empty(a.size, dtype=float3) + floataddfloat3(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(c+a['x'], dest['x']) or \ + not np.allclose(c+a['y'], dest['y']) or \ + not np.allclose(c+a['z'], dest['z']): + assert False + +def testfloat3subfloat(): + dest = np.empty(a.size, dtype=float3) + float3subfloat(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(a['x']-c, dest['x']) or \ + not np.allclose(a['y']-c, dest['y']) or \ + not np.allclose(a['z']-c, dest['z']): + assert False + +def testfloat3subfloatequal(): + dest = np.copy(a) + float3subfloatequal(cuda.InOut(dest), c, **size) + if not np.allclose(a['x']-c, dest['x']) or \ + not np.allclose(a['y']-c, dest['y']) or \ + not np.allclose(a['z']-c, dest['z']): + assert False + +def testfloatsubfloat3(): + dest = np.empty(a.size, dtype=float3) + floatsubfloat3(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(c-a['x'], dest['x']) or \ + not np.allclose(c-a['y'], dest['y']) or \ + not np.allclose(c-a['z'], dest['z']): + assert False + +def testfloat3mulfloat(): + dest = np.empty(a.size, dtype=float3) + float3mulfloat(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(a['x']*c, dest['x']) or \ + not np.allclose(a['y']*c, dest['y']) or \ + not np.allclose(a['z']*c, dest['z']): + assert False + +def testfloat3mulfloatequal(): + dest = np.copy(a) + float3mulfloatequal(cuda.InOut(dest), c, **size) + if not np.allclose(a['x']*c, dest['x']) or \ + not np.allclose(a['y']*c, dest['y']) or \ + not np.allclose(a['z']*c, dest['z']): + assert False + +def testfloatmulfloat3(): + dest = np.empty(a.size, dtype=float3) + floatmulfloat3(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(c*a['x'], dest['x']) or \ + not np.allclose(c*a['y'], dest['y']) or \ + not np.allclose(c*a['z'], dest['z']): + assert False + +def testfloat3divfloat(): + dest = np.empty(a.size, dtype=float3) + float3divfloat(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(a['x']/c, dest['x']) or \ + not np.allclose(a['y']/c, dest['y']) or \ + not np.allclose(a['z']/c, dest['z']): + assert False + +def testfloat3divfloatequal(): + dest = np.copy(a) + float3divfloatequal(cuda.InOut(dest), c, **size) + if not np.allclose(a['x']/c, dest['x']) or \ + not np.allclose(a['y']/c, dest['y']) or \ + not np.allclose(a['z']/c, dest['z']): + assert False + +def testfloatdivfloat3(): + dest = np.empty(a.size, dtype=float3) + floatdivfloat3(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(c/a['x'], dest['x']) or \ + not np.allclose(c/a['y'], dest['y']) or \ + not np.allclose(c/a['z'], dest['z']): + assert false + +def testdot(): + dest = np.empty(a.size, dtype=np.float32) + dot(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + if not np.allclose(a['x']*b['x'] + a['y']*b['y'] + a['z']*b['z'], dest): + assert False + +def testcross(): + dest = np.empty(a.size, dtype=float3) + cross(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + for u, v, wdest in zip(a,b,dest): + w = np.cross((u['x'], u['y'], u['z']),(v['x'],v['y'],v['z'])) + if not np.allclose(wdest['x'], w[0]) or \ + not np.allclose(wdest['y'], w[1]) or \ + not np.allclose(wdest['z'], w[2]): + print w + print wdest + assert False + +def testnorm(): + dest = np.empty(a.size, dtype=np.float32) + norm(cuda.In(a), cuda.Out(dest), **size) + + for i in range(len(dest)): + if not np.allclose(np.linalg.norm((a['x'][i],a['y'][i],a['z'][i])), dest[i]): + assert False + +def testminusfloat3(): + dest = np.empty(a.size, dtype=float3) + minusfloat3(cuda.In(a), cuda.Out(dest), **size) + if not np.allclose(-a['x'], dest['x']) or \ + not np.allclose(-a['y'], dest['y']) or \ + not np.allclose(-a['z'], dest['z']): + assert False diff --git a/test/matrix_test.cu b/test/matrix_test.cu new file mode 100644 index 0000000..d64cb34 --- /dev/null +++ b/test/matrix_test.cu @@ -0,0 +1,152 @@ +//-*-c-*- + +#include "matrix.h" + +__device__ Matrix array2matrix(float *a) +{ + return make_matrix(a[0], a[1], a[2], + a[3], a[4], a[5], + a[6], a[7], a[8]); +} + +__device__ void matrix2array(const Matrix &m, float *a) +{ + a[0] = m.a00; + a[1] = m.a01; + a[2] = m.a02; + a[3] = m.a10; + a[4] = m.a11; + a[5] = m.a12; + a[6] = m.a20; + a[7] = m.a21; + a[8] = m.a22; +} + +extern "C" +{ + +__global__ void det(float *a, float *dest) +{ + Matrix m = array2matrix(a); + dest[0] = det(m); +} + +__global__ void inv(float *a, float *dest) +{ + Matrix m = array2matrix(a); + matrix2array(inv(m), dest); +} + +__global__ void minusmatrix(float *a, float *dest) +{ + matrix2array(-array2matrix(a), dest); +} + +__global__ void matrixadd(float *a, float *b, float *dest) +{ + matrix2array(array2matrix(a)+array2matrix(b), dest); +} + +__global__ void matrixsub(float *a, float *b, float *dest) +{ + matrix2array(array2matrix(a)-array2matrix(b), dest); +} + +__global__ void matrixmul(float *a, float *b, float *dest) +{ + matrix2array(array2matrix(a)*array2matrix(b), dest); +} + +__global__ void multiply(float *a, float3 *x, float3 *dest) +{ + dest[0] = array2matrix(a)*x[0]; +} + +__global__ void matrixaddfloat(float *a, float c, float *dest) +{ + matrix2array(array2matrix(a)+c, dest); +} + +__global__ void matrixsubfloat(float *a, float c, float *dest) +{ + matrix2array(array2matrix(a)-c, dest); +} + +__global__ void matrixmulfloat(float *a, float c, float *dest) +{ + matrix2array(array2matrix(a)*c, dest); +} + +__global__ void matrixdivfloat(float *a, float c, float *dest) +{ + matrix2array(array2matrix(a)/c, dest); +} + +__global__ void floataddmatrix(float *a, float c, float *dest) +{ + matrix2array(c+array2matrix(a), dest); +} + +__global__ void floatsubmatrix(float *a, float c, float *dest) +{ + matrix2array(c-array2matrix(a), dest); +} + +__global__ void floatmulmatrix(float *a, float c, float *dest) +{ + matrix2array(c*array2matrix(a), dest); +} + +__global__ void floatdivmatrix(float *a, float c, float *dest) +{ + matrix2array(c/array2matrix(a), dest); +} + +__global__ void matrixaddequals(float *a, float *b) +{ + Matrix m = array2matrix(a); + m += array2matrix(b); + matrix2array(m,a); +} + +__global__ void matrixsubequals(float *a, float *b) +{ + Matrix m = array2matrix(a); + m -= array2matrix(b); + matrix2array(m,a); +} + +__global__ void matrixaddequalsfloat(float *a, float c) +{ + Matrix m = array2matrix(a); + m += c; + matrix2array(m,a); +} + +__global__ void matrixsubequalsfloat(float *a, float c) +{ + Matrix m = array2matrix(a); + m -= c; + matrix2array(m,a); +} + +__global__ void matrixmulequalsfloat(float *a, float c) +{ + Matrix m = array2matrix(a); + m *= c; + matrix2array(m,a); +} + +__global__ void matrixdivequalsfloat(float *a, float c) +{ + Matrix m = array2matrix(a); + m /= c; + matrix2array(m,a); +} + +__global__ void outer(float3 a, float3 b, float* dest) +{ + matrix2array(outer(a,b), dest); +} + +} // extern "c" diff --git a/test/matrix_test.py b/test/matrix_test.py new file mode 100644 index 0000000..c843025 --- /dev/null +++ b/test/matrix_test.py @@ -0,0 +1,327 @@ +import os +import numpy as np +from pycuda import autoinit +from pycuda.compiler import SourceModule +import pycuda.driver as cuda +from pycuda import gpuarray + +float3 = gpuarray.vec.float3 + +print 'device %s' % autoinit.device.name() + +current_directory = os.path.split(os.path.realpath(__file__))[0] +source_directory = current_directory + '/../src' + +source = open(current_directory + '/matrix_test.cu').read() + +mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False) + +det = mod.get_function('det') +inv = mod.get_function('inv') +matrixadd = mod.get_function('matrixadd') +matrixsub = mod.get_function('matrixsub') +matrixmul = mod.get_function('matrixmul') +multiply = mod.get_function('multiply') +matrixaddfloat = mod.get_function('matrixaddfloat') +matrixsubfloat = mod.get_function('matrixsubfloat') +matrixmulfloat = mod.get_function('matrixmulfloat') +matrixdivfloat = mod.get_function('matrixdivfloat') +floataddmatrix = mod.get_function('floataddmatrix') +floatsubmatrix = mod.get_function('floatsubmatrix') +floatmulmatrix = mod.get_function('floatmulmatrix') +floatdivmatrix = mod.get_function('floatdivmatrix') +matrixaddequals = mod.get_function('matrixaddequals') +matrixsubequals = mod.get_function('matrixsubequals') +matrixaddequalsfloat = mod.get_function('matrixaddequalsfloat') +matrixsubequalsfloat = mod.get_function('matrixsubequalsfloat') +matrixmulequalsfloat = mod.get_function('matrixmulequalsfloat') +matrixdivequalsfloat = mod.get_function('matrixdivequalsfloat') +outer = mod.get_function('outer') +minusmatrix = mod.get_function('minusmatrix') + +size = {'block': (1,1,1), 'grid': (1,1)} + +for i in range(1): + a = np.random.random_sample(size=9).astype(np.float32) + b = np.random.random_sample(size=9).astype(np.float32) + dest = np.empty(1, dtype=np.float32) + c = np.int32(np.random.random_sample()) + + print 'testing det...', + + det(cuda.In(a), cuda.Out(dest), **size) + + if not np.allclose(np.float32(np.linalg.det(a.reshape(3,3))), dest[0]): + print 'fail' + print np.float32(np.linalg.det(a.reshape(3,3))) + print dest[0] + else: + print 'success' + + print 'testing inv...', + + dest = np.empty(9, dtype=np.float32) + + inv(cuda.In(a), cuda.Out(dest), **size) + + if not np.allclose(np.linalg.inv(a.reshape(3,3)).flatten().astype(np.float32), dest): + print 'fail' + print np.linalg.inv(a.reshape(3,3)).flatten().astype(np.float32) + print dest + else: + print 'success' + + print 'testing matrixadd...', + + matrixadd(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + + if not np.allclose(a+b, dest): + print 'fail' + print a+b + print dest + else: + print 'success' + + print 'testing matrixsub...', + + matrixsub(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + + if not np.allclose(a-b, dest): + print 'fail' + print a-b + print dest + else: + print 'sucess' + + print 'testing matrixmul...', + + matrixmul(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + + if not np.allclose(np.dot(a.reshape(3,3),b.reshape(3,3)).flatten(), dest): + print 'fail' + print np.dot(a.reshape(3,3),b.reshape(3,3)).flatten() + print dest + else: + print 'success' + + print 'testing multiply...', + + x_cpu = np.random.random_sample(size=3).astype(np.float32) + x_gpu = np.array((x_cpu[0], x_cpu[1], x_cpu[2]), dtype=float3) + + dest = np.empty(1, dtype=float3) + + multiply(cuda.In(a), cuda.In(x_gpu), cuda.Out(dest), **size) + + m = a.reshape(3,3) + + if not np.allclose(np.dot(x_cpu,m[0]), dest[0]['x']) or \ + not np.allclose(np.dot(x_cpu,m[1]), dest[0]['y']) or \ + not np.allclose(np.dot(x_cpu,m[2]), dest[0]['z']): + print 'fail' + print np.dot(x_cpu,m[0]) + print np.dot(x_cpu,m[1]) + print np.dot(x_cpu,m[2]) + print dest[0]['x'] + print dest[0]['y'] + print dest[0]['z'] + else: + print 'success' + + print 'testing matrixaddfloat...', + + dest = np.empty(9, dtype=np.float32) + + matrixaddfloat(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(a+c, dest): + print 'fail' + print a+c + print dest + else: + print 'success' + + print 'testing matrixsubfloat...', + + matrixsubfloat(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(a-c, dest): + print 'fail' + print a-c + print dest + else: + print 'success' + + print 'testing matrixmulfloat...', + + matrixmulfloat(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(a*c, dest): + print 'fail' + print a-c + print dest + else: + print 'success' + + print 'testing matrixdivfloat...', + + matrixdivfloat(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(a/c, dest): + print 'fail' + print a/c + print dest + else: + print 'success' + + print 'testing floataddmatrix...', + + floataddmatrix(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(c+a, dest): + print 'fail' + print c+a + print dest + else: + print 'success' + + print 'testing floatsubmatrix...', + + floatsubmatrix(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(c-a, dest): + print 'fail' + print c-a + print dest + else: + print 'success' + + print 'testing floatmulmatrix...', + + floatmulmatrix(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(c*a, dest): + print 'fail' + print c*a + print dest + else: + print 'success' + + print 'testing floatdivmatrix...', + + floatdivmatrix(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(c/a, dest): + print 'fail' + print c/a + print dest + else: + print 'success' + + print 'testing matrixaddequals...', + + dest = np.copy(a) + + matrixaddequals(cuda.InOut(dest), cuda.In(b), **size) + + if not np.allclose(a+b, dest): + print 'fail' + print a+b + print dest + else: + print 'success' + + print 'testing matrixsubequals...', + + dest = np.copy(a) + + matrixsubequals(cuda.InOut(dest), cuda.In(b), **size) + + if not np.allclose(a-b, dest): + print 'fail' + print a-b + print dest + else: + print 'success' + + print 'testing matrixaddequalsfloat...', + + dest = np.copy(a) + + matrixaddequalsfloat(cuda.InOut(dest), c, **size) + + if not np.allclose(a+c, dest): + print 'fail' + print a+c + print dest + else: + print 'success' + + print 'testing matrixsubequalsfloat...', + + dest = np.copy(a) + + matrixsubequalsfloat(cuda.InOut(dest), c, **size) + + if not np.allclose(a-c, dest): + print 'fail' + print a-c + print dest + else: + print 'success' + + print 'testing matrixmulequalsfloat...', + + dest = np.copy(a) + + matrixmulequalsfloat(cuda.InOut(dest), c, **size) + + if not np.allclose(a*c, dest): + print 'fail' + print a*c + print dest + else: + print 'success' + + print 'testing matrixdivequalsfloat...', + + dest = np.copy(a) + + matrixdivequalsfloat(cuda.InOut(dest), c, **size) + + if not np.allclose(a/c, dest): + print 'fail' + print a*c + print dest + else: + print 'success' + + print 'testing outer...', + + x1_cpu = np.random.random_sample(size=3).astype(np.float32) + x2_cpu = np.random.random_sample(size=3).astype(np.float32) + + x1_gpu = np.array((x1_cpu[0], x1_cpu[1], x1_cpu[2]), dtype=float3) + x2_gpu = np.array((x2_cpu[0], x2_cpu[1], x2_cpu[2]), dtype=float3) + + outer(x1_gpu, x2_gpu, cuda.Out(dest), **size) + + if not np.allclose(np.outer(x1_cpu, x2_cpu).flatten(), dest): + print 'fail' + print np.outer(x1_cpu, x2_cpu).flatten() + print dest + else: + print 'success' + + print 'testing minus matrix...', + + dest = np.copy(a) + + minusmatrix(cuda.In(a), cuda.Out(dest), **size) + + if not np.allclose(-a, dest): + print 'fail' + print -a + print dest + else: + print 'success' diff --git a/test/rotate_test.cu b/test/rotate_test.cu new file mode 100644 index 0000000..6cafc12 --- /dev/null +++ b/test/rotate_test.cu @@ -0,0 +1,14 @@ +//-*-c-*- + +#include "rotate.h" + +extern "C" +{ + +__global__ void rotate(float3 *a, float *phi, float3 *n, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = rotate(a[idx], phi[idx], n[idx]); +} + +} // extern "c" diff --git a/test/rotate_test.py b/test/rotate_test.py new file mode 100644 index 0000000..92eff84 --- /dev/null +++ b/test/rotate_test.py @@ -0,0 +1,67 @@ +import os +import numpy as np +from pycuda import autoinit +from pycuda.compiler import SourceModule +import pycuda.driver as cuda +from pycuda import gpuarray +float3 = gpuarray.vec.float3 + +def rotate(x, phi, n): + x = np.asarray(x) + n = np.asarray(n) + + r = np.cos(phi)*np.identity(3) + (1-np.cos(phi))*np.outer(n,n) + \ + np.sin(phi)*np.array([[0,n[2],-n[1]],[-n[2],0,n[0]],[n[1],-n[0],0]]) + + return np.inner(x,r) + +print 'device %s' % autoinit.device.name() + +current_directory = os.path.split(os.path.realpath(__file__))[0] +source_directory = current_directory + '/../src' + +source = open(current_directory + '/rotate_test.cu').read() + +mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False) + +rotate_gpu = mod.get_function('rotate') + +size = {'block': (100,1,1), 'grid': (1,1)} + +a = np.empty(size['block'][0], dtype=float3) +n = np.empty(size['block'][0], dtype=float3) +phi = np.random.random_sample(size=a.size).astype(np.float32) + +a['x'] = np.random.random_sample(size=a.size) +a['y'] = np.random.random_sample(size=a.size) +a['z'] = np.random.random_sample(size=a.size) + +n['x'] = np.random.random_sample(size=n.size) +n['y'] = np.random.random_sample(size=n.size) +n['z'] = np.random.random_sample(size=n.size) + +a['x'] = np.ones(a.size) +a['y'] = np.zeros(a.size) +a['z'] = np.zeros(a.size) + +n['x'] = np.zeros(n.size) +n['y'] = np.zeros(n.size) +n['z'] = np.ones(n.size) + +phi = np.array([np.pi/2]*a.size).astype(np.float32) + +def testrotate(): + dest = np.empty(a.size, dtype=float3) + rotate_gpu(cuda.In(a), cuda.In(phi), cuda.In(n), cuda.Out(dest), **size) + for v, theta, w, rdest in zip(a,phi,n,dest): + r = rotate((v['x'], v['y'], v['z']), theta, (w['x'], w['y'], w['z'])) + if not np.allclose(rdest['x'], r[0]) or \ + not np.allclose(rdest['y'], r[1]) or \ + not np.allclose(rdest['z'], r[2]): + print v + print theta + print w + print r + print rdest + assert False + diff --git a/test/test_fileio.py b/test/test_fileio.py new file mode 100644 index 0000000..3869a9f --- /dev/null +++ b/test/test_fileio.py @@ -0,0 +1,74 @@ +import unittest +from chroma.fileio import root +from chroma import event +import numpy as np + +class TestFileIO(unittest.TestCase): + def test_file_write_and_read(self): + ev = event.Event(1, event.Vertex('e-', pos=(0,0,1), dir=(1,0,0), + ke=15.0, pol=(0,1,0))) + + photons_beg = root.make_photon_with_arrays(1) + photons_beg.pos[0] = (1,2,3) + photons_beg.dir[0] = (4,5,6) + photons_beg.pol[0] = (7,8,9) + photons_beg.wavelengths[0] = 400.0 + photons_beg.t[0] = 100.0 + photons_beg.last_hit_triangles[0] = 5 + photons_beg.flags[0] = 20 + ev.photons_beg = photons_beg + + photons_end = root.make_photon_with_arrays(1) + photons_end.pos[0] = (1,2,3) + photons_end.dir[0] = (4,5,6) + photons_end.pol[0] = (7,8,9) + photons_end.wavelengths[0] = 400.0 + photons_end.t[0] = 100.0 + photons_end.last_hit_triangles[0] = 5 + photons_end.flags[0] = 20 + ev.photons_end = photons_end + + ev.vertices = [ev.primary_vertex] + + channels = event.Channels(hit=np.array([True, False]), + t=np.array([20.0, 1e9], dtype=np.float32), + q=np.array([2.0, 0.0], dtype=np.float32), + flags=np.array([8, 32], dtype=np.uint32)) + ev.channels = channels + + filename = '/tmp/chroma-filewritertest.root' + writer = root.RootWriter(filename) + writer.write_event(ev) + writer.close() + + # Exercise the RootReader methods + reader = root.RootReader(filename) + self.assertEquals(len(reader), 1) + + self.assertRaises(StopIteration, reader.prev) + + reader.next() + + self.assertEqual(reader.index(), 0) + self.assertRaises(StopIteration, reader.next) + + reader.jump_to(0) + + # Enough screwing around, let's get the one event in the file + newev = reader.current() + + # Now check if everything is correct in the event + for attribute in ['id']: + self.assertEqual(getattr(ev, attribute), getattr(newev, attribute), 'compare %s' % attribute) + + for attribute in ['pos', 'dir', 'pol', 'ke', 't0']: + self.assertTrue(np.allclose(getattr(ev.primary_vertex, attribute), getattr(newev.primary_vertex, attribute)), 'compare %s' % attribute) + + for i in range(len(ev.vertices)): + self.assertTrue(np.allclose(getattr(ev.vertices[i], attribute), getattr(newev.vertices[i], attribute)), 'compare %s' % attribute) + + for attribute in ['pos', 'dir', 'pol', 'wavelengths', 't', 'last_hit_triangles', 'flags']: + self.assertTrue(np.allclose(getattr(ev.photons_beg, attribute), + getattr(newev.photons_beg, attribute)), 'compare %s' % attribute) + self.assertTrue(np.allclose(getattr(ev.photons_end, attribute), + getattr(newev.photons_end, attribute)), 'compare %s' % attribute) diff --git a/test/test_generator_photon.py b/test/test_generator_photon.py new file mode 100644 index 0000000..13501fe --- /dev/null +++ b/test/test_generator_photon.py @@ -0,0 +1,21 @@ +import unittest +import itertools + +from chroma import generator +from chroma.generator.vertex import constant_particle_gun +from chroma import optics + +class TestG4ParallelGenerator(unittest.TestCase): + def test_center(self): + '''Generate Cherenkov light at the center of the world volume''' + gen = generator.photon.G4ParallelGenerator(1, optics.water_wcsim) + vertex = itertools.islice(constant_particle_gun('e-', (0,0,0), (1,0,0), 100), 10) + for event in gen.generate_events(vertex): + self.assertGreater(len(event.photons_beg.pos), 0) + + def test_off_center(self): + '''Generate Cherenkov light at (1 m, 0 m, 0 m)''' + gen = generator.photon.G4ParallelGenerator(1, optics.water_wcsim) + vertex = itertools.islice(constant_particle_gun('e-', (1,0,0), (1,0,0), 100), 10) + for event in gen.generate_events(vertex): + self.assertGreater(len(event.photons_beg.pos), 0) diff --git a/test/test_generator_vertex.py b/test/test_generator_vertex.py new file mode 100644 index 0000000..cec363d --- /dev/null +++ b/test/test_generator_vertex.py @@ -0,0 +1,23 @@ +import unittest +import itertools +import numpy as np +import chroma.generator.vertex + +class TestParticleGun(unittest.TestCase): + def test_constant_particle_gun_center(self): + '''Generate electron vertices at the center of the world volume.''' + vertex = chroma.generator.vertex.constant_particle_gun('e-', (0,0,0), (1,0,0), 100) + for ev in itertools.islice(vertex, 100): + self.assertEquals(ev.primary_vertex.particle_name, 'e-') + self.assertTrue(np.allclose(ev.primary_vertex.pos, [0,0,0])) + self.assertTrue(np.allclose(ev.primary_vertex.dir, [1,0,0])) + self.assertTrue(np.allclose(ev.primary_vertex.ke, 100)) + + def test_off_center(self): + '''Generate electron vertices at (1,0,0) in the world volume.''' + vertex = chroma.generator.vertex.constant_particle_gun('e-', (1,0,0), (1,0,0), 100) + for ev in itertools.islice(vertex, 100): + self.assertEquals(ev.primary_vertex.particle_name, 'e-') + self.assertTrue(np.allclose(ev.primary_vertex.pos, [1,0,0])) + self.assertTrue(np.allclose(ev.primary_vertex.dir, [1,0,0])) + self.assertTrue(np.allclose(ev.primary_vertex.ke, 100)) diff --git a/test/test_pdf.py b/test/test_pdf.py new file mode 100644 index 0000000..571cbd4 --- /dev/null +++ b/test/test_pdf.py @@ -0,0 +1,67 @@ +import unittest +import numpy as np +import itertools + +import chroma.detectors +from chroma.generator.photon import G4ParallelGenerator +from chroma.generator.vertex import constant_particle_gun +from chroma.optics import water_wcsim +from chroma import gpu +from chroma.sim import Simulation + +class TestPDF(unittest.TestCase): + def setUp(self): + self.detector = chroma.detectors.microlbne() + self.detector.build() + self.vertex_gen = constant_particle_gun('e-', (0,0,0), (1,0,0), 10) + + def testGPUPDF(self): + '''Create a hit count and (q,t) PDF for 10 MeV events in MicroLBNE''' + + g4generator = G4ParallelGenerator(1, water_wcsim) + + context = gpu.create_cuda_context() + + gpu_geometry = gpu.GPUGeometry(self.detector) + + nthreads_per_block, max_blocks = 64, 1024 + + rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks) + + gpu_daq = gpu.GPUDaq(gpu_geometry, max(self.detector.pmtids)) + gpu_pdf = gpu.GPUPDF() + gpu_pdf.setup_pdf(max(self.detector.pmtids), 100, (-0.5, 999.5), 10, (-0.5, 9.5)) + + gpu_pdf.clear_pdf() + + for ev in g4generator.generate_events(itertools.islice(self.vertex_gen, 10)): + gpu_photons = gpu.GPUPhotons(ev.photons_beg) + gpu_photons.propagate(gpu_geometry, rng_states, nthreads_per_block, + max_blocks) + gpu_channels = gpu_daq.acquire(gpu_photons, rng_states, + nthreads_per_block, max_blocks) + gpu_pdf.add_hits_to_pdf(gpu_channels) + + hitcount, pdf = gpu_pdf.get_pdfs() + self.assertTrue( (hitcount > 0).any() ) + self.assertTrue( (pdf > 0).any() ) + + # Consistency checks + for i, nhits in enumerate(hitcount): + self.assertEqual(nhits, pdf[i].sum()) + + context.pop() + + def testSimPDF(self): + sim = Simulation(self.detector) + + vertex_iter = itertools.islice(self.vertex_gen, 10) + + hitcount, pdf = sim.create_pdf(vertex_iter, 100, (-0.5, 999.5), 10, (-0.5, 9.5)) + + self.assertTrue( (hitcount > 0).any() ) + self.assertTrue( (pdf > 0).any() ) + + # Consistency checks + for i, nhits in enumerate(hitcount): + self.assertEqual(nhits, pdf[i].sum()) diff --git a/test/test_propagation.py b/test/test_propagation.py new file mode 100644 index 0000000..43ecb9b --- /dev/null +++ b/test/test_propagation.py @@ -0,0 +1,58 @@ +import unittest +import numpy as np + +from chroma.geometry import Solid, Geometry +from chroma.make import box +from chroma.sim import Simulation +from chroma.optics import vacuum +from chroma.event import Photons + +class TestPropagation(unittest.TestCase): + def testAbort(self): + '''Photons that hit a triangle at normal incidence should not abort. + + Photons that hit a triangle at exactly normal incidence can sometimes + produce a dot product that is outside the range allowed by acos(). + Trigger these with axis aligned photons in a box. + ''' + + # Setup geometry + cube = Geometry(vacuum) + cube.add_solid(Solid(box(100,100,100), vacuum, vacuum)) + cube.pmtids = [0] + cube.build(use_cache=False) + sim = Simulation(cube, geant4_processes=0) + + # Create initial photons + nphotons = 10000 + pos = np.tile([0,0,0], (nphotons,1)).astype(np.float32) + dir = np.tile([0,0,1], (nphotons,1)).astype(np.float32) + pol = np.zeros_like(pos) + phi = np.random.uniform(0, 2*np.pi, nphotons).astype(np.float32) + pol[:,0] = np.cos(phi) + pol[:,1] = np.sin(phi) + t = np.zeros(nphotons, dtype=np.float32) + wavelengths = np.empty(nphotons, np.float32) + wavelengths.fill(400.0) + + photons = Photons(pos=pos, dir=dir, pol=pol, t=t, + wavelengths=wavelengths) + + # First make one step to check for strangeness + photons_end = sim.simulate([photons], keep_photons_end=True, + max_steps=1).next().photons_end + + self.assertFalse(np.isnan(photons_end.pos).any()) + self.assertFalse(np.isnan(photons_end.dir).any()) + self.assertFalse(np.isnan(photons_end.pol).any()) + self.assertFalse(np.isnan(photons_end.t).any()) + self.assertFalse(np.isnan(photons_end.wavelengths).any()) + + # Now let it run the usual ten steps + photons_end = sim.simulate([photons], keep_photons_end=True, + max_steps=10).next().photons_end + aborted = (photons_end.flags & (1 << 31)) > 0 + print 'aborted photons: %1.1f' % \ + (float(np.count_nonzero(aborted)) / nphotons) + self.assertFalse(aborted.any()) + diff --git a/test/test_ray_intersection.py b/test/test_ray_intersection.py new file mode 100644 index 0000000..7d0c53c --- /dev/null +++ b/test/test_ray_intersection.py @@ -0,0 +1,27 @@ +import unittest +import chroma +import numpy as np +import os +from pycuda import gpuarray as ga + +class TestRayIntersection(unittest.TestCase): + def setUp(self): + self.context = chroma.gpu.create_cuda_context() + self.module = chroma.gpu.get_cu_module('mesh.h') + self.gpu_funcs = chroma.gpu.GPUFuncs(self.module) + self.box = chroma.gpu.GPUGeometry(chroma.build(chroma.make.cube())) + + pos, dir = chroma.project.from_film() + self.pos_gpu = ga.to_gpu(chroma.gpu.to_float3(pos)) + self.dir_gpu = ga.to_gpu(chroma.gpu.to_float3(dir)) + + testdir = os.path.dirname(os.path.abspath(chroma.tests.__file__)) + self.dx_standard = np.load(os.path.join(testdir, + 'data/ray_intersection.npz')) + def test_intersection_distance(self): + dx = ga.zeros(self.pos_gpu.size, dtype=np.float32) + self.gpu_funcs.distance_to_mesh(np.int32(self.pos_gpu.size), self.pos_gpu, self.dir_gpu, self.box.gpudata, dx, block=(64,1,1), grid=(self.pos_gpu.size//64+1,1)) + self.assertTrue((dx.get() == self.dx_standard).all()) + + def tearDown(self): + self.context.pop() diff --git a/test/test_rayleigh.py b/test/test_rayleigh.py new file mode 100644 index 0000000..02ccb41 --- /dev/null +++ b/test/test_rayleigh.py @@ -0,0 +1,57 @@ +import unittest +import numpy as np + +from chroma.geometry import Solid, Geometry +from chroma.make import box +from chroma.sim import Simulation +from chroma.optics import water_wcsim +from chroma.event import Photons +import histogram +from histogram.root import rootify +import ROOT +ROOT.gROOT.SetBatch(1) + +class TestRayleigh(unittest.TestCase): + def setUp(self): + self.cube = Geometry(water_wcsim) + self.cube.add_solid(Solid(box(100,100,100), water_wcsim, water_wcsim)) + self.cube.pmtids = [0] + self.cube.build(use_cache=False) + self.sim = Simulation(self.cube, geant4_processes=0) + + nphotons = 100000 + pos = np.tile([0,0,0], (nphotons,1)).astype(np.float32) + dir = np.tile([0,0,1], (nphotons,1)).astype(np.float32) + pol = np.zeros_like(pos) + phi = np.random.uniform(0, 2*np.pi, nphotons).astype(np.float32) + pol[:,0] = np.cos(phi) + pol[:,1] = np.sin(phi) + t = np.zeros(nphotons, dtype=np.float32) + wavelengths = np.empty(nphotons, np.float32) + wavelengths.fill(400.0) + + self.photons = Photons(pos=pos, dir=dir, pol=pol, t=t, wavelengths=wavelengths) + + def testAngularDistributionPolarized(self): + # Fully polarized photons + self.photons.pol[:] = [1.0, 0.0, 0.0] + + photons_end = self.sim.simulate([self.photons], keep_photons_end=True, max_steps=1).next().photons_end + aborted = (photons_end.flags & (1 << 31)) > 0 + self.assertFalse(aborted.any()) + + # Compute the dot product between initial and final dir + rayleigh_scatters = (photons_end.flags & (1 << 4)) > 0 + cos_scatter = (self.photons.dir[rayleigh_scatters] * photons_end.dir[rayleigh_scatters]).sum(axis=1) + theta_scatter = np.arccos(cos_scatter) + h = histogram.Histogram(bins=100, range=(0, np.pi)) + h.fill(theta_scatter) + h = rootify(h) + + # The functional form for polarized light should be + # (1 + \cos^2 \theta)\sin \theta according to GEANT4 physics + # reference manual. + f = ROOT.TF1("pol_func", "[0]*(1+cos(x)**2)*sin(x)", 0, np.pi) + h.Fit(f) + self.assertGreater(f.GetProb(), 1e-3) + diff --git a/test/test_sample_cdf.cu b/test/test_sample_cdf.cu new file mode 100644 index 0000000..1401772 --- /dev/null +++ b/test/test_sample_cdf.cu @@ -0,0 +1,16 @@ +// -*-c++-*- +#include "random.h" + +extern "C" { + +__global__ void test_sample_cdf(int offset, int ncdf, + float *cdf_x, float *cdf_y, float *out) +{ + int id = blockDim.x * blockIdx.x + threadIdx.x; + curandState s; + curand_init(0, id, offset, &s); + + out[id] = sample_cdf(&s, ncdf, cdf_x, cdf_y); +} + +} diff --git a/test/test_sample_cdf.py b/test/test_sample_cdf.py new file mode 100644 index 0000000..2baa2ac --- /dev/null +++ b/test/test_sample_cdf.py @@ -0,0 +1,64 @@ +import pycuda.autoinit +import pycuda.driver as cuda +from pycuda.compiler import SourceModule +from pycuda import gpuarray +import numpy as np +import ROOT +import os + +current_directory = os.path.split(os.path.realpath(__file__))[0] +source_directory = current_directory + '/../src' + +source = open(current_directory + '/test_sample_cdf.cu').read() + +mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False) + +test_sample_cdf = mod.get_function('test_sample_cdf') + +def compare_sampling(hist, reps=10): + nbins = hist.GetNbinsX(); + xaxis = hist.GetXaxis() + intg = hist.GetIntegral() + cdf_y = np.empty(nbins+1, dtype=float) + cdf_x = np.empty_like(cdf_y) + + cdf_x[0] = xaxis.GetBinLowEdge(1) + cdf_y[0] = 0.0 + for i in xrange(1,len(cdf_x)): + cdf_y[i] = intg[i] + cdf_x[i] = xaxis.GetBinUpEdge(i) + + cdf_x_gpu = gpuarray.to_gpu(cdf_x.astype(np.float32)) + cdf_y_gpu = gpuarray.to_gpu(cdf_y.astype(np.float32)) + block =(128,1,1) + grid = (128, 1) + out_gpu = gpuarray.empty(shape=int(block[0]*grid[0]), dtype=np.float32) + + out_h = ROOT.TH1D('out_h', '', hist.GetNbinsX(), + xaxis.GetXmin(), + xaxis.GetXmax()) + out_h.SetLineColor(ROOT.kGreen) + + for i in xrange(reps): + test_sample_cdf(np.int32(i), + np.int32(len(cdf_x_gpu)), + cdf_x_gpu, cdf_y_gpu, out_gpu, block=block, grid=grid) + out = out_gpu.get() + for v in out: + out_h.Fill(v) + + prob = out_h.KolmogorovTest(hist) + return prob, out_h + +def test_sampling(): + '''Verify that the CDF-based sampler on the GPU reproduces a binned + Gaussian distribution''' + f = ROOT.TF1('f_gaussian', 'gaus(0)', -5, 5) + f.SetParameters(1.0/np.sqrt(np.pi * 2), 0.0, 1.0) + gaussian = ROOT.TH1D('gaussian', '', 100, -5, 5) + gaussian.Add(f) + + prob, out_h = compare_sampling(gaussian, reps=50) + + assert prob > 0.01 + diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/data/ray_intersection.npz b/tests/data/ray_intersection.npz deleted file mode 100644 index fba0e40..0000000 Binary files a/tests/data/ray_intersection.npz and /dev/null differ diff --git a/tests/linalg_test.cu b/tests/linalg_test.cu deleted file mode 100644 index 4e9c983..0000000 --- a/tests/linalg_test.cu +++ /dev/null @@ -1,128 +0,0 @@ -//-*-c-*- - -#include "linalg.h" - -extern "C" -{ - -__global__ void float3add(float3 *a, float3 *b, float3 *dest) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = a[idx] + b[idx]; -} - -__global__ void float3addequal(float3 *a, float3 *b) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - a[idx] += b[idx]; -} - -__global__ void float3sub(float3 *a, float3 *b, float3 *dest) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = a[idx] - b[idx]; -} - -__global__ void float3subequal(float3 *a, float3 *b) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - a[idx] -= b[idx]; -} - -__global__ void float3addfloat(float3 *a, float c, float3 *dest) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = a[idx] + c; -} - -__global__ void float3addfloatequal(float3 *a, float c) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - a[idx] += c; -} - -__global__ void floataddfloat3(float3 *a, float c, float3 *dest) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = c + a[idx]; -} - -__global__ void float3subfloat(float3 *a, float c, float3 *dest) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = a[idx] - c; -} - -__global__ void float3subfloatequal(float3 *a, float c) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - a[idx] -= c; -} - -__global__ void floatsubfloat3(float3 *a, float c, float3 *dest) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = c - a[idx]; -} - -__global__ void float3mulfloat(float3 *a, float c, float3 *dest) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = a[idx]*c; -} - -__global__ void float3mulfloatequal(float3 *a, float c) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - a[idx] *= c; -} - -__global__ void floatmulfloat3(float3 *a, float c, float3 *dest) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = c*a[idx]; -} - -__global__ void float3divfloat(float3 *a, float c, float3 *dest) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = a[idx]/c; -} - -__global__ void float3divfloatequal(float3 *a, float c) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - a[idx] /= c; -} - -__global__ void floatdivfloat3(float3 *a, float c, float3 *dest) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = c/a[idx]; -} - -__global__ void dot(float3 *a, float3 *b, float *dest) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = dot(a[idx],b[idx]); -} - -__global__ void cross(float3 *a, float3 *b, float3 *dest) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = cross(a[idx],b[idx]); -} - -__global__ void norm(float3 *a, float *dest) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = norm(a[idx]); -} - -__global__ void minusfloat3(float3 *a, float3 *dest) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = -a[idx]; -} - -} // extern "c" diff --git a/tests/linalg_test.py b/tests/linalg_test.py deleted file mode 100644 index 31688d9..0000000 --- a/tests/linalg_test.py +++ /dev/null @@ -1,214 +0,0 @@ -import os -import numpy as np -from pycuda import autoinit -from pycuda.compiler import SourceModule -import pycuda.driver as cuda -from pycuda import gpuarray - -float3 = gpuarray.vec.float3 - -print 'device %s' % autoinit.device.name() - -current_directory = os.path.split(os.path.realpath(__file__))[0] -source_directory = current_directory + '/../src' - -source = open(current_directory + '/linalg_test.cu').read() - -mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False) - -float3add = mod.get_function('float3add') -float3addequal = mod.get_function('float3addequal') -float3sub = mod.get_function('float3sub') -float3subequal = mod.get_function('float3subequal') -float3addfloat = mod.get_function('float3addfloat') -float3addfloatequal = mod.get_function('float3addfloatequal') -floataddfloat3 = mod.get_function('floataddfloat3') -float3subfloat = mod.get_function('float3subfloat') -float3subfloatequal = mod.get_function('float3subfloatequal') -floatsubfloat3 = mod.get_function('floatsubfloat3') -float3mulfloat = mod.get_function('float3mulfloat') -float3mulfloatequal = mod.get_function('float3mulfloatequal') -floatmulfloat3 = mod.get_function('floatmulfloat3') -float3divfloat = mod.get_function('float3divfloat') -float3divfloatequal = mod.get_function('float3divfloatequal') -floatdivfloat3 = mod.get_function('floatdivfloat3') -dot = mod.get_function('dot') -cross = mod.get_function('cross') -norm = mod.get_function('norm') -minusfloat3 = mod.get_function('minusfloat3') - -size = {'block': (256,1,1), 'grid': (1,1)} - -a = np.empty(size['block'][0], dtype=float3) -b = np.empty(size['block'][0], dtype=float3) -c = np.float32(np.random.random_sample()) - -a['x'] = np.random.random_sample(size=a.size) -a['y'] = np.random.random_sample(size=a.size) -a['z'] = np.random.random_sample(size=a.size) - -b['x'] = np.random.random_sample(size=b.size) -b['y'] = np.random.random_sample(size=b.size) -b['z'] = np.random.random_sample(size=b.size) - -def testfloat3add(): - dest = np.empty(a.size, dtype=float3) - float3add(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) - if not np.allclose(a['x']+b['x'], dest['x']) or \ - not np.allclose(a['y']+b['y'], dest['y']) or \ - not np.allclose(a['z']+b['z'], dest['z']): - assert False - -def testfloat3sub(): - dest = np.empty(a.size, dtype=float3) - float3sub(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) - if not np.allclose(a['x']-b['x'], dest['x']) or \ - not np.allclose(a['y']-b['y'], dest['y']) or \ - not np.allclose(a['z']-b['z'], dest['z']): - assert False - -def testfloat3addequal(): - dest = np.copy(a) - float3addequal(cuda.InOut(dest), cuda.In(b), **size) - if not np.allclose(a['x']+b['x'], dest['x']) or \ - not np.allclose(a['y']+b['y'], dest['y']) or \ - not np.allclose(a['z']+b['z'], dest['z']): - assert False - -def testfloat3subequal(): - dest = np.copy(a) - float3subequal(cuda.InOut(dest), cuda.In(b), **size) - if not np.allclose(a['x']-b['x'], dest['x']) or \ - not np.allclose(a['y']-b['y'], dest['y']) or \ - not np.allclose(a['z']-b['z'], dest['z']): - assert False - -def testfloat3addfloat(): - dest = np.empty(a.size, dtype=float3) - float3addfloat(cuda.In(a), c, cuda.Out(dest), **size) - if not np.allclose(a['x']+c, dest['x']) or \ - not np.allclose(a['y']+c, dest['y']) or \ - not np.allclose(a['z']+c, dest['z']): - assert False - -def testfloat3addfloatequal(): - dest = np.copy(a) - float3addfloatequal(cuda.InOut(dest), c, **size) - if not np.allclose(a['x']+c, dest['x']) or \ - not np.allclose(a['y']+c, dest['y']) or \ - not np.allclose(a['z']+c, dest['z']): - assert False - -def testfloataddfloat3(): - dest = np.empty(a.size, dtype=float3) - floataddfloat3(cuda.In(a), c, cuda.Out(dest), **size) - if not np.allclose(c+a['x'], dest['x']) or \ - not np.allclose(c+a['y'], dest['y']) or \ - not np.allclose(c+a['z'], dest['z']): - assert False - -def testfloat3subfloat(): - dest = np.empty(a.size, dtype=float3) - float3subfloat(cuda.In(a), c, cuda.Out(dest), **size) - if not np.allclose(a['x']-c, dest['x']) or \ - not np.allclose(a['y']-c, dest['y']) or \ - not np.allclose(a['z']-c, dest['z']): - assert False - -def testfloat3subfloatequal(): - dest = np.copy(a) - float3subfloatequal(cuda.InOut(dest), c, **size) - if not np.allclose(a['x']-c, dest['x']) or \ - not np.allclose(a['y']-c, dest['y']) or \ - not np.allclose(a['z']-c, dest['z']): - assert False - -def testfloatsubfloat3(): - dest = np.empty(a.size, dtype=float3) - floatsubfloat3(cuda.In(a), c, cuda.Out(dest), **size) - if not np.allclose(c-a['x'], dest['x']) or \ - not np.allclose(c-a['y'], dest['y']) or \ - not np.allclose(c-a['z'], dest['z']): - assert False - -def testfloat3mulfloat(): - dest = np.empty(a.size, dtype=float3) - float3mulfloat(cuda.In(a), c, cuda.Out(dest), **size) - if not np.allclose(a['x']*c, dest['x']) or \ - not np.allclose(a['y']*c, dest['y']) or \ - not np.allclose(a['z']*c, dest['z']): - assert False - -def testfloat3mulfloatequal(): - dest = np.copy(a) - float3mulfloatequal(cuda.InOut(dest), c, **size) - if not np.allclose(a['x']*c, dest['x']) or \ - not np.allclose(a['y']*c, dest['y']) or \ - not np.allclose(a['z']*c, dest['z']): - assert False - -def testfloatmulfloat3(): - dest = np.empty(a.size, dtype=float3) - floatmulfloat3(cuda.In(a), c, cuda.Out(dest), **size) - if not np.allclose(c*a['x'], dest['x']) or \ - not np.allclose(c*a['y'], dest['y']) or \ - not np.allclose(c*a['z'], dest['z']): - assert False - -def testfloat3divfloat(): - dest = np.empty(a.size, dtype=float3) - float3divfloat(cuda.In(a), c, cuda.Out(dest), **size) - if not np.allclose(a['x']/c, dest['x']) or \ - not np.allclose(a['y']/c, dest['y']) or \ - not np.allclose(a['z']/c, dest['z']): - assert False - -def testfloat3divfloatequal(): - dest = np.copy(a) - float3divfloatequal(cuda.InOut(dest), c, **size) - if not np.allclose(a['x']/c, dest['x']) or \ - not np.allclose(a['y']/c, dest['y']) or \ - not np.allclose(a['z']/c, dest['z']): - assert False - -def testfloatdivfloat3(): - dest = np.empty(a.size, dtype=float3) - floatdivfloat3(cuda.In(a), c, cuda.Out(dest), **size) - if not np.allclose(c/a['x'], dest['x']) or \ - not np.allclose(c/a['y'], dest['y']) or \ - not np.allclose(c/a['z'], dest['z']): - assert false - -def testdot(): - dest = np.empty(a.size, dtype=np.float32) - dot(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) - if not np.allclose(a['x']*b['x'] + a['y']*b['y'] + a['z']*b['z'], dest): - assert False - -def testcross(): - dest = np.empty(a.size, dtype=float3) - cross(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) - for u, v, wdest in zip(a,b,dest): - w = np.cross((u['x'], u['y'], u['z']),(v['x'],v['y'],v['z'])) - if not np.allclose(wdest['x'], w[0]) or \ - not np.allclose(wdest['y'], w[1]) or \ - not np.allclose(wdest['z'], w[2]): - print w - print wdest - assert False - -def testnorm(): - dest = np.empty(a.size, dtype=np.float32) - norm(cuda.In(a), cuda.Out(dest), **size) - - for i in range(len(dest)): - if not np.allclose(np.linalg.norm((a['x'][i],a['y'][i],a['z'][i])), dest[i]): - assert False - -def testminusfloat3(): - dest = np.empty(a.size, dtype=float3) - minusfloat3(cuda.In(a), cuda.Out(dest), **size) - if not np.allclose(-a['x'], dest['x']) or \ - not np.allclose(-a['y'], dest['y']) or \ - not np.allclose(-a['z'], dest['z']): - assert False diff --git a/tests/matrix_test.cu b/tests/matrix_test.cu deleted file mode 100644 index d64cb34..0000000 --- a/tests/matrix_test.cu +++ /dev/null @@ -1,152 +0,0 @@ -//-*-c-*- - -#include "matrix.h" - -__device__ Matrix array2matrix(float *a) -{ - return make_matrix(a[0], a[1], a[2], - a[3], a[4], a[5], - a[6], a[7], a[8]); -} - -__device__ void matrix2array(const Matrix &m, float *a) -{ - a[0] = m.a00; - a[1] = m.a01; - a[2] = m.a02; - a[3] = m.a10; - a[4] = m.a11; - a[5] = m.a12; - a[6] = m.a20; - a[7] = m.a21; - a[8] = m.a22; -} - -extern "C" -{ - -__global__ void det(float *a, float *dest) -{ - Matrix m = array2matrix(a); - dest[0] = det(m); -} - -__global__ void inv(float *a, float *dest) -{ - Matrix m = array2matrix(a); - matrix2array(inv(m), dest); -} - -__global__ void minusmatrix(float *a, float *dest) -{ - matrix2array(-array2matrix(a), dest); -} - -__global__ void matrixadd(float *a, float *b, float *dest) -{ - matrix2array(array2matrix(a)+array2matrix(b), dest); -} - -__global__ void matrixsub(float *a, float *b, float *dest) -{ - matrix2array(array2matrix(a)-array2matrix(b), dest); -} - -__global__ void matrixmul(float *a, float *b, float *dest) -{ - matrix2array(array2matrix(a)*array2matrix(b), dest); -} - -__global__ void multiply(float *a, float3 *x, float3 *dest) -{ - dest[0] = array2matrix(a)*x[0]; -} - -__global__ void matrixaddfloat(float *a, float c, float *dest) -{ - matrix2array(array2matrix(a)+c, dest); -} - -__global__ void matrixsubfloat(float *a, float c, float *dest) -{ - matrix2array(array2matrix(a)-c, dest); -} - -__global__ void matrixmulfloat(float *a, float c, float *dest) -{ - matrix2array(array2matrix(a)*c, dest); -} - -__global__ void matrixdivfloat(float *a, float c, float *dest) -{ - matrix2array(array2matrix(a)/c, dest); -} - -__global__ void floataddmatrix(float *a, float c, float *dest) -{ - matrix2array(c+array2matrix(a), dest); -} - -__global__ void floatsubmatrix(float *a, float c, float *dest) -{ - matrix2array(c-array2matrix(a), dest); -} - -__global__ void floatmulmatrix(float *a, float c, float *dest) -{ - matrix2array(c*array2matrix(a), dest); -} - -__global__ void floatdivmatrix(float *a, float c, float *dest) -{ - matrix2array(c/array2matrix(a), dest); -} - -__global__ void matrixaddequals(float *a, float *b) -{ - Matrix m = array2matrix(a); - m += array2matrix(b); - matrix2array(m,a); -} - -__global__ void matrixsubequals(float *a, float *b) -{ - Matrix m = array2matrix(a); - m -= array2matrix(b); - matrix2array(m,a); -} - -__global__ void matrixaddequalsfloat(float *a, float c) -{ - Matrix m = array2matrix(a); - m += c; - matrix2array(m,a); -} - -__global__ void matrixsubequalsfloat(float *a, float c) -{ - Matrix m = array2matrix(a); - m -= c; - matrix2array(m,a); -} - -__global__ void matrixmulequalsfloat(float *a, float c) -{ - Matrix m = array2matrix(a); - m *= c; - matrix2array(m,a); -} - -__global__ void matrixdivequalsfloat(float *a, float c) -{ - Matrix m = array2matrix(a); - m /= c; - matrix2array(m,a); -} - -__global__ void outer(float3 a, float3 b, float* dest) -{ - matrix2array(outer(a,b), dest); -} - -} // extern "c" diff --git a/tests/matrix_test.py b/tests/matrix_test.py deleted file mode 100644 index c843025..0000000 --- a/tests/matrix_test.py +++ /dev/null @@ -1,327 +0,0 @@ -import os -import numpy as np -from pycuda import autoinit -from pycuda.compiler import SourceModule -import pycuda.driver as cuda -from pycuda import gpuarray - -float3 = gpuarray.vec.float3 - -print 'device %s' % autoinit.device.name() - -current_directory = os.path.split(os.path.realpath(__file__))[0] -source_directory = current_directory + '/../src' - -source = open(current_directory + '/matrix_test.cu').read() - -mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False) - -det = mod.get_function('det') -inv = mod.get_function('inv') -matrixadd = mod.get_function('matrixadd') -matrixsub = mod.get_function('matrixsub') -matrixmul = mod.get_function('matrixmul') -multiply = mod.get_function('multiply') -matrixaddfloat = mod.get_function('matrixaddfloat') -matrixsubfloat = mod.get_function('matrixsubfloat') -matrixmulfloat = mod.get_function('matrixmulfloat') -matrixdivfloat = mod.get_function('matrixdivfloat') -floataddmatrix = mod.get_function('floataddmatrix') -floatsubmatrix = mod.get_function('floatsubmatrix') -floatmulmatrix = mod.get_function('floatmulmatrix') -floatdivmatrix = mod.get_function('floatdivmatrix') -matrixaddequals = mod.get_function('matrixaddequals') -matrixsubequals = mod.get_function('matrixsubequals') -matrixaddequalsfloat = mod.get_function('matrixaddequalsfloat') -matrixsubequalsfloat = mod.get_function('matrixsubequalsfloat') -matrixmulequalsfloat = mod.get_function('matrixmulequalsfloat') -matrixdivequalsfloat = mod.get_function('matrixdivequalsfloat') -outer = mod.get_function('outer') -minusmatrix = mod.get_function('minusmatrix') - -size = {'block': (1,1,1), 'grid': (1,1)} - -for i in range(1): - a = np.random.random_sample(size=9).astype(np.float32) - b = np.random.random_sample(size=9).astype(np.float32) - dest = np.empty(1, dtype=np.float32) - c = np.int32(np.random.random_sample()) - - print 'testing det...', - - det(cuda.In(a), cuda.Out(dest), **size) - - if not np.allclose(np.float32(np.linalg.det(a.reshape(3,3))), dest[0]): - print 'fail' - print np.float32(np.linalg.det(a.reshape(3,3))) - print dest[0] - else: - print 'success' - - print 'testing inv...', - - dest = np.empty(9, dtype=np.float32) - - inv(cuda.In(a), cuda.Out(dest), **size) - - if not np.allclose(np.linalg.inv(a.reshape(3,3)).flatten().astype(np.float32), dest): - print 'fail' - print np.linalg.inv(a.reshape(3,3)).flatten().astype(np.float32) - print dest - else: - print 'success' - - print 'testing matrixadd...', - - matrixadd(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) - - if not np.allclose(a+b, dest): - print 'fail' - print a+b - print dest - else: - print 'success' - - print 'testing matrixsub...', - - matrixsub(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) - - if not np.allclose(a-b, dest): - print 'fail' - print a-b - print dest - else: - print 'sucess' - - print 'testing matrixmul...', - - matrixmul(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) - - if not np.allclose(np.dot(a.reshape(3,3),b.reshape(3,3)).flatten(), dest): - print 'fail' - print np.dot(a.reshape(3,3),b.reshape(3,3)).flatten() - print dest - else: - print 'success' - - print 'testing multiply...', - - x_cpu = np.random.random_sample(size=3).astype(np.float32) - x_gpu = np.array((x_cpu[0], x_cpu[1], x_cpu[2]), dtype=float3) - - dest = np.empty(1, dtype=float3) - - multiply(cuda.In(a), cuda.In(x_gpu), cuda.Out(dest), **size) - - m = a.reshape(3,3) - - if not np.allclose(np.dot(x_cpu,m[0]), dest[0]['x']) or \ - not np.allclose(np.dot(x_cpu,m[1]), dest[0]['y']) or \ - not np.allclose(np.dot(x_cpu,m[2]), dest[0]['z']): - print 'fail' - print np.dot(x_cpu,m[0]) - print np.dot(x_cpu,m[1]) - print np.dot(x_cpu,m[2]) - print dest[0]['x'] - print dest[0]['y'] - print dest[0]['z'] - else: - print 'success' - - print 'testing matrixaddfloat...', - - dest = np.empty(9, dtype=np.float32) - - matrixaddfloat(cuda.In(a), c, cuda.Out(dest), **size) - - if not np.allclose(a+c, dest): - print 'fail' - print a+c - print dest - else: - print 'success' - - print 'testing matrixsubfloat...', - - matrixsubfloat(cuda.In(a), c, cuda.Out(dest), **size) - - if not np.allclose(a-c, dest): - print 'fail' - print a-c - print dest - else: - print 'success' - - print 'testing matrixmulfloat...', - - matrixmulfloat(cuda.In(a), c, cuda.Out(dest), **size) - - if not np.allclose(a*c, dest): - print 'fail' - print a-c - print dest - else: - print 'success' - - print 'testing matrixdivfloat...', - - matrixdivfloat(cuda.In(a), c, cuda.Out(dest), **size) - - if not np.allclose(a/c, dest): - print 'fail' - print a/c - print dest - else: - print 'success' - - print 'testing floataddmatrix...', - - floataddmatrix(cuda.In(a), c, cuda.Out(dest), **size) - - if not np.allclose(c+a, dest): - print 'fail' - print c+a - print dest - else: - print 'success' - - print 'testing floatsubmatrix...', - - floatsubmatrix(cuda.In(a), c, cuda.Out(dest), **size) - - if not np.allclose(c-a, dest): - print 'fail' - print c-a - print dest - else: - print 'success' - - print 'testing floatmulmatrix...', - - floatmulmatrix(cuda.In(a), c, cuda.Out(dest), **size) - - if not np.allclose(c*a, dest): - print 'fail' - print c*a - print dest - else: - print 'success' - - print 'testing floatdivmatrix...', - - floatdivmatrix(cuda.In(a), c, cuda.Out(dest), **size) - - if not np.allclose(c/a, dest): - print 'fail' - print c/a - print dest - else: - print 'success' - - print 'testing matrixaddequals...', - - dest = np.copy(a) - - matrixaddequals(cuda.InOut(dest), cuda.In(b), **size) - - if not np.allclose(a+b, dest): - print 'fail' - print a+b - print dest - else: - print 'success' - - print 'testing matrixsubequals...', - - dest = np.copy(a) - - matrixsubequals(cuda.InOut(dest), cuda.In(b), **size) - - if not np.allclose(a-b, dest): - print 'fail' - print a-b - print dest - else: - print 'success' - - print 'testing matrixaddequalsfloat...', - - dest = np.copy(a) - - matrixaddequalsfloat(cuda.InOut(dest), c, **size) - - if not np.allclose(a+c, dest): - print 'fail' - print a+c - print dest - else: - print 'success' - - print 'testing matrixsubequalsfloat...', - - dest = np.copy(a) - - matrixsubequalsfloat(cuda.InOut(dest), c, **size) - - if not np.allclose(a-c, dest): - print 'fail' - print a-c - print dest - else: - print 'success' - - print 'testing matrixmulequalsfloat...', - - dest = np.copy(a) - - matrixmulequalsfloat(cuda.InOut(dest), c, **size) - - if not np.allclose(a*c, dest): - print 'fail' - print a*c - print dest - else: - print 'success' - - print 'testing matrixdivequalsfloat...', - - dest = np.copy(a) - - matrixdivequalsfloat(cuda.InOut(dest), c, **size) - - if not np.allclose(a/c, dest): - print 'fail' - print a*c - print dest - else: - print 'success' - - print 'testing outer...', - - x1_cpu = np.random.random_sample(size=3).astype(np.float32) - x2_cpu = np.random.random_sample(size=3).astype(np.float32) - - x1_gpu = np.array((x1_cpu[0], x1_cpu[1], x1_cpu[2]), dtype=float3) - x2_gpu = np.array((x2_cpu[0], x2_cpu[1], x2_cpu[2]), dtype=float3) - - outer(x1_gpu, x2_gpu, cuda.Out(dest), **size) - - if not np.allclose(np.outer(x1_cpu, x2_cpu).flatten(), dest): - print 'fail' - print np.outer(x1_cpu, x2_cpu).flatten() - print dest - else: - print 'success' - - print 'testing minus matrix...', - - dest = np.copy(a) - - minusmatrix(cuda.In(a), cuda.Out(dest), **size) - - if not np.allclose(-a, dest): - print 'fail' - print -a - print dest - else: - print 'success' diff --git a/tests/rotate_test.cu b/tests/rotate_test.cu deleted file mode 100644 index 6cafc12..0000000 --- a/tests/rotate_test.cu +++ /dev/null @@ -1,14 +0,0 @@ -//-*-c-*- - -#include "rotate.h" - -extern "C" -{ - -__global__ void rotate(float3 *a, float *phi, float3 *n, float3 *dest) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = rotate(a[idx], phi[idx], n[idx]); -} - -} // extern "c" diff --git a/tests/rotate_test.py b/tests/rotate_test.py deleted file mode 100644 index 92eff84..0000000 --- a/tests/rotate_test.py +++ /dev/null @@ -1,67 +0,0 @@ -import os -import numpy as np -from pycuda import autoinit -from pycuda.compiler import SourceModule -import pycuda.driver as cuda -from pycuda import gpuarray -float3 = gpuarray.vec.float3 - -def rotate(x, phi, n): - x = np.asarray(x) - n = np.asarray(n) - - r = np.cos(phi)*np.identity(3) + (1-np.cos(phi))*np.outer(n,n) + \ - np.sin(phi)*np.array([[0,n[2],-n[1]],[-n[2],0,n[0]],[n[1],-n[0],0]]) - - return np.inner(x,r) - -print 'device %s' % autoinit.device.name() - -current_directory = os.path.split(os.path.realpath(__file__))[0] -source_directory = current_directory + '/../src' - -source = open(current_directory + '/rotate_test.cu').read() - -mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False) - -rotate_gpu = mod.get_function('rotate') - -size = {'block': (100,1,1), 'grid': (1,1)} - -a = np.empty(size['block'][0], dtype=float3) -n = np.empty(size['block'][0], dtype=float3) -phi = np.random.random_sample(size=a.size).astype(np.float32) - -a['x'] = np.random.random_sample(size=a.size) -a['y'] = np.random.random_sample(size=a.size) -a['z'] = np.random.random_sample(size=a.size) - -n['x'] = np.random.random_sample(size=n.size) -n['y'] = np.random.random_sample(size=n.size) -n['z'] = np.random.random_sample(size=n.size) - -a['x'] = np.ones(a.size) -a['y'] = np.zeros(a.size) -a['z'] = np.zeros(a.size) - -n['x'] = np.zeros(n.size) -n['y'] = np.zeros(n.size) -n['z'] = np.ones(n.size) - -phi = np.array([np.pi/2]*a.size).astype(np.float32) - -def testrotate(): - dest = np.empty(a.size, dtype=float3) - rotate_gpu(cuda.In(a), cuda.In(phi), cuda.In(n), cuda.Out(dest), **size) - for v, theta, w, rdest in zip(a,phi,n,dest): - r = rotate((v['x'], v['y'], v['z']), theta, (w['x'], w['y'], w['z'])) - if not np.allclose(rdest['x'], r[0]) or \ - not np.allclose(rdest['y'], r[1]) or \ - not np.allclose(rdest['z'], r[2]): - print v - print theta - print w - print r - print rdest - assert False - diff --git a/tests/test_fileio.py b/tests/test_fileio.py deleted file mode 100644 index 3869a9f..0000000 --- a/tests/test_fileio.py +++ /dev/null @@ -1,74 +0,0 @@ -import unittest -from chroma.fileio import root -from chroma import event -import numpy as np - -class TestFileIO(unittest.TestCase): - def test_file_write_and_read(self): - ev = event.Event(1, event.Vertex('e-', pos=(0,0,1), dir=(1,0,0), - ke=15.0, pol=(0,1,0))) - - photons_beg = root.make_photon_with_arrays(1) - photons_beg.pos[0] = (1,2,3) - photons_beg.dir[0] = (4,5,6) - photons_beg.pol[0] = (7,8,9) - photons_beg.wavelengths[0] = 400.0 - photons_beg.t[0] = 100.0 - photons_beg.last_hit_triangles[0] = 5 - photons_beg.flags[0] = 20 - ev.photons_beg = photons_beg - - photons_end = root.make_photon_with_arrays(1) - photons_end.pos[0] = (1,2,3) - photons_end.dir[0] = (4,5,6) - photons_end.pol[0] = (7,8,9) - photons_end.wavelengths[0] = 400.0 - photons_end.t[0] = 100.0 - photons_end.last_hit_triangles[0] = 5 - photons_end.flags[0] = 20 - ev.photons_end = photons_end - - ev.vertices = [ev.primary_vertex] - - channels = event.Channels(hit=np.array([True, False]), - t=np.array([20.0, 1e9], dtype=np.float32), - q=np.array([2.0, 0.0], dtype=np.float32), - flags=np.array([8, 32], dtype=np.uint32)) - ev.channels = channels - - filename = '/tmp/chroma-filewritertest.root' - writer = root.RootWriter(filename) - writer.write_event(ev) - writer.close() - - # Exercise the RootReader methods - reader = root.RootReader(filename) - self.assertEquals(len(reader), 1) - - self.assertRaises(StopIteration, reader.prev) - - reader.next() - - self.assertEqual(reader.index(), 0) - self.assertRaises(StopIteration, reader.next) - - reader.jump_to(0) - - # Enough screwing around, let's get the one event in the file - newev = reader.current() - - # Now check if everything is correct in the event - for attribute in ['id']: - self.assertEqual(getattr(ev, attribute), getattr(newev, attribute), 'compare %s' % attribute) - - for attribute in ['pos', 'dir', 'pol', 'ke', 't0']: - self.assertTrue(np.allclose(getattr(ev.primary_vertex, attribute), getattr(newev.primary_vertex, attribute)), 'compare %s' % attribute) - - for i in range(len(ev.vertices)): - self.assertTrue(np.allclose(getattr(ev.vertices[i], attribute), getattr(newev.vertices[i], attribute)), 'compare %s' % attribute) - - for attribute in ['pos', 'dir', 'pol', 'wavelengths', 't', 'last_hit_triangles', 'flags']: - self.assertTrue(np.allclose(getattr(ev.photons_beg, attribute), - getattr(newev.photons_beg, attribute)), 'compare %s' % attribute) - self.assertTrue(np.allclose(getattr(ev.photons_end, attribute), - getattr(newev.photons_end, attribute)), 'compare %s' % attribute) diff --git a/tests/test_generator_photon.py b/tests/test_generator_photon.py deleted file mode 100644 index 13501fe..0000000 --- a/tests/test_generator_photon.py +++ /dev/null @@ -1,21 +0,0 @@ -import unittest -import itertools - -from chroma import generator -from chroma.generator.vertex import constant_particle_gun -from chroma import optics - -class TestG4ParallelGenerator(unittest.TestCase): - def test_center(self): - '''Generate Cherenkov light at the center of the world volume''' - gen = generator.photon.G4ParallelGenerator(1, optics.water_wcsim) - vertex = itertools.islice(constant_particle_gun('e-', (0,0,0), (1,0,0), 100), 10) - for event in gen.generate_events(vertex): - self.assertGreater(len(event.photons_beg.pos), 0) - - def test_off_center(self): - '''Generate Cherenkov light at (1 m, 0 m, 0 m)''' - gen = generator.photon.G4ParallelGenerator(1, optics.water_wcsim) - vertex = itertools.islice(constant_particle_gun('e-', (1,0,0), (1,0,0), 100), 10) - for event in gen.generate_events(vertex): - self.assertGreater(len(event.photons_beg.pos), 0) diff --git a/tests/test_generator_vertex.py b/tests/test_generator_vertex.py deleted file mode 100644 index cec363d..0000000 --- a/tests/test_generator_vertex.py +++ /dev/null @@ -1,23 +0,0 @@ -import unittest -import itertools -import numpy as np -import chroma.generator.vertex - -class TestParticleGun(unittest.TestCase): - def test_constant_particle_gun_center(self): - '''Generate electron vertices at the center of the world volume.''' - vertex = chroma.generator.vertex.constant_particle_gun('e-', (0,0,0), (1,0,0), 100) - for ev in itertools.islice(vertex, 100): - self.assertEquals(ev.primary_vertex.particle_name, 'e-') - self.assertTrue(np.allclose(ev.primary_vertex.pos, [0,0,0])) - self.assertTrue(np.allclose(ev.primary_vertex.dir, [1,0,0])) - self.assertTrue(np.allclose(ev.primary_vertex.ke, 100)) - - def test_off_center(self): - '''Generate electron vertices at (1,0,0) in the world volume.''' - vertex = chroma.generator.vertex.constant_particle_gun('e-', (1,0,0), (1,0,0), 100) - for ev in itertools.islice(vertex, 100): - self.assertEquals(ev.primary_vertex.particle_name, 'e-') - self.assertTrue(np.allclose(ev.primary_vertex.pos, [1,0,0])) - self.assertTrue(np.allclose(ev.primary_vertex.dir, [1,0,0])) - self.assertTrue(np.allclose(ev.primary_vertex.ke, 100)) diff --git a/tests/test_pdf.py b/tests/test_pdf.py deleted file mode 100644 index 571cbd4..0000000 --- a/tests/test_pdf.py +++ /dev/null @@ -1,67 +0,0 @@ -import unittest -import numpy as np -import itertools - -import chroma.detectors -from chroma.generator.photon import G4ParallelGenerator -from chroma.generator.vertex import constant_particle_gun -from chroma.optics import water_wcsim -from chroma import gpu -from chroma.sim import Simulation - -class TestPDF(unittest.TestCase): - def setUp(self): - self.detector = chroma.detectors.microlbne() - self.detector.build() - self.vertex_gen = constant_particle_gun('e-', (0,0,0), (1,0,0), 10) - - def testGPUPDF(self): - '''Create a hit count and (q,t) PDF for 10 MeV events in MicroLBNE''' - - g4generator = G4ParallelGenerator(1, water_wcsim) - - context = gpu.create_cuda_context() - - gpu_geometry = gpu.GPUGeometry(self.detector) - - nthreads_per_block, max_blocks = 64, 1024 - - rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks) - - gpu_daq = gpu.GPUDaq(gpu_geometry, max(self.detector.pmtids)) - gpu_pdf = gpu.GPUPDF() - gpu_pdf.setup_pdf(max(self.detector.pmtids), 100, (-0.5, 999.5), 10, (-0.5, 9.5)) - - gpu_pdf.clear_pdf() - - for ev in g4generator.generate_events(itertools.islice(self.vertex_gen, 10)): - gpu_photons = gpu.GPUPhotons(ev.photons_beg) - gpu_photons.propagate(gpu_geometry, rng_states, nthreads_per_block, - max_blocks) - gpu_channels = gpu_daq.acquire(gpu_photons, rng_states, - nthreads_per_block, max_blocks) - gpu_pdf.add_hits_to_pdf(gpu_channels) - - hitcount, pdf = gpu_pdf.get_pdfs() - self.assertTrue( (hitcount > 0).any() ) - self.assertTrue( (pdf > 0).any() ) - - # Consistency checks - for i, nhits in enumerate(hitcount): - self.assertEqual(nhits, pdf[i].sum()) - - context.pop() - - def testSimPDF(self): - sim = Simulation(self.detector) - - vertex_iter = itertools.islice(self.vertex_gen, 10) - - hitcount, pdf = sim.create_pdf(vertex_iter, 100, (-0.5, 999.5), 10, (-0.5, 9.5)) - - self.assertTrue( (hitcount > 0).any() ) - self.assertTrue( (pdf > 0).any() ) - - # Consistency checks - for i, nhits in enumerate(hitcount): - self.assertEqual(nhits, pdf[i].sum()) diff --git a/tests/test_propagation.py b/tests/test_propagation.py deleted file mode 100644 index 43ecb9b..0000000 --- a/tests/test_propagation.py +++ /dev/null @@ -1,58 +0,0 @@ -import unittest -import numpy as np - -from chroma.geometry import Solid, Geometry -from chroma.make import box -from chroma.sim import Simulation -from chroma.optics import vacuum -from chroma.event import Photons - -class TestPropagation(unittest.TestCase): - def testAbort(self): - '''Photons that hit a triangle at normal incidence should not abort. - - Photons that hit a triangle at exactly normal incidence can sometimes - produce a dot product that is outside the range allowed by acos(). - Trigger these with axis aligned photons in a box. - ''' - - # Setup geometry - cube = Geometry(vacuum) - cube.add_solid(Solid(box(100,100,100), vacuum, vacuum)) - cube.pmtids = [0] - cube.build(use_cache=False) - sim = Simulation(cube, geant4_processes=0) - - # Create initial photons - nphotons = 10000 - pos = np.tile([0,0,0], (nphotons,1)).astype(np.float32) - dir = np.tile([0,0,1], (nphotons,1)).astype(np.float32) - pol = np.zeros_like(pos) - phi = np.random.uniform(0, 2*np.pi, nphotons).astype(np.float32) - pol[:,0] = np.cos(phi) - pol[:,1] = np.sin(phi) - t = np.zeros(nphotons, dtype=np.float32) - wavelengths = np.empty(nphotons, np.float32) - wavelengths.fill(400.0) - - photons = Photons(pos=pos, dir=dir, pol=pol, t=t, - wavelengths=wavelengths) - - # First make one step to check for strangeness - photons_end = sim.simulate([photons], keep_photons_end=True, - max_steps=1).next().photons_end - - self.assertFalse(np.isnan(photons_end.pos).any()) - self.assertFalse(np.isnan(photons_end.dir).any()) - self.assertFalse(np.isnan(photons_end.pol).any()) - self.assertFalse(np.isnan(photons_end.t).any()) - self.assertFalse(np.isnan(photons_end.wavelengths).any()) - - # Now let it run the usual ten steps - photons_end = sim.simulate([photons], keep_photons_end=True, - max_steps=10).next().photons_end - aborted = (photons_end.flags & (1 << 31)) > 0 - print 'aborted photons: %1.1f' % \ - (float(np.count_nonzero(aborted)) / nphotons) - self.assertFalse(aborted.any()) - diff --git a/tests/test_ray_intersection.py b/tests/test_ray_intersection.py deleted file mode 100644 index 7d0c53c..0000000 --- a/tests/test_ray_intersection.py +++ /dev/null @@ -1,27 +0,0 @@ -import unittest -import chroma -import numpy as np -import os -from pycuda import gpuarray as ga - -class TestRayIntersection(unittest.TestCase): - def setUp(self): - self.context = chroma.gpu.create_cuda_context() - self.module = chroma.gpu.get_cu_module('mesh.h') - self.gpu_funcs = chroma.gpu.GPUFuncs(self.module) - self.box = chroma.gpu.GPUGeometry(chroma.build(chroma.make.cube())) - - pos, dir = chroma.project.from_film() - self.pos_gpu = ga.to_gpu(chroma.gpu.to_float3(pos)) - self.dir_gpu = ga.to_gpu(chroma.gpu.to_float3(dir)) - - testdir = os.path.dirname(os.path.abspath(chroma.tests.__file__)) - self.dx_standard = np.load(os.path.join(testdir, - 'data/ray_intersection.npz')) - def test_intersection_distance(self): - dx = ga.zeros(self.pos_gpu.size, dtype=np.float32) - self.gpu_funcs.distance_to_mesh(np.int32(self.pos_gpu.size), self.pos_gpu, self.dir_gpu, self.box.gpudata, dx, block=(64,1,1), grid=(self.pos_gpu.size//64+1,1)) - self.assertTrue((dx.get() == self.dx_standard).all()) - - def tearDown(self): - self.context.pop() diff --git a/tests/test_rayleigh.py b/tests/test_rayleigh.py deleted file mode 100644 index 02ccb41..0000000 --- a/tests/test_rayleigh.py +++ /dev/null @@ -1,57 +0,0 @@ -import unittest -import numpy as np - -from chroma.geometry import Solid, Geometry -from chroma.make import box -from chroma.sim import Simulation -from chroma.optics import water_wcsim -from chroma.event import Photons -import histogram -from histogram.root import rootify -import ROOT -ROOT.gROOT.SetBatch(1) - -class TestRayleigh(unittest.TestCase): - def setUp(self): - self.cube = Geometry(water_wcsim) - self.cube.add_solid(Solid(box(100,100,100), water_wcsim, water_wcsim)) - self.cube.pmtids = [0] - self.cube.build(use_cache=False) - self.sim = Simulation(self.cube, geant4_processes=0) - - nphotons = 100000 - pos = np.tile([0,0,0], (nphotons,1)).astype(np.float32) - dir = np.tile([0,0,1], (nphotons,1)).astype(np.float32) - pol = np.zeros_like(pos) - phi = np.random.uniform(0, 2*np.pi, nphotons).astype(np.float32) - pol[:,0] = np.cos(phi) - pol[:,1] = np.sin(phi) - t = np.zeros(nphotons, dtype=np.float32) - wavelengths = np.empty(nphotons, np.float32) - wavelengths.fill(400.0) - - self.photons = Photons(pos=pos, dir=dir, pol=pol, t=t, wavelengths=wavelengths) - - def testAngularDistributionPolarized(self): - # Fully polarized photons - self.photons.pol[:] = [1.0, 0.0, 0.0] - - photons_end = self.sim.simulate([self.photons], keep_photons_end=True, max_steps=1).next().photons_end - aborted = (photons_end.flags & (1 << 31)) > 0 - self.assertFalse(aborted.any()) - - # Compute the dot product between initial and final dir - rayleigh_scatters = (photons_end.flags & (1 << 4)) > 0 - cos_scatter = (self.photons.dir[rayleigh_scatters] * photons_end.dir[rayleigh_scatters]).sum(axis=1) - theta_scatter = np.arccos(cos_scatter) - h = histogram.Histogram(bins=100, range=(0, np.pi)) - h.fill(theta_scatter) - h = rootify(h) - - # The functional form for polarized light should be - # (1 + \cos^2 \theta)\sin \theta according to GEANT4 physics - # reference manual. - f = ROOT.TF1("pol_func", "[0]*(1+cos(x)**2)*sin(x)", 0, np.pi) - h.Fit(f) - self.assertGreater(f.GetProb(), 1e-3) - diff --git a/tests/test_sample_cdf.cu b/tests/test_sample_cdf.cu deleted file mode 100644 index 1401772..0000000 --- a/tests/test_sample_cdf.cu +++ /dev/null @@ -1,16 +0,0 @@ -// -*-c++-*- -#include "random.h" - -extern "C" { - -__global__ void test_sample_cdf(int offset, int ncdf, - float *cdf_x, float *cdf_y, float *out) -{ - int id = blockDim.x * blockIdx.x + threadIdx.x; - curandState s; - curand_init(0, id, offset, &s); - - out[id] = sample_cdf(&s, ncdf, cdf_x, cdf_y); -} - -} diff --git a/tests/test_sample_cdf.py b/tests/test_sample_cdf.py deleted file mode 100644 index 2baa2ac..0000000 --- a/tests/test_sample_cdf.py +++ /dev/null @@ -1,64 +0,0 @@ -import pycuda.autoinit -import pycuda.driver as cuda -from pycuda.compiler import SourceModule -from pycuda import gpuarray -import numpy as np -import ROOT -import os - -current_directory = os.path.split(os.path.realpath(__file__))[0] -source_directory = current_directory + '/../src' - -source = open(current_directory + '/test_sample_cdf.cu').read() - -mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False) - -test_sample_cdf = mod.get_function('test_sample_cdf') - -def compare_sampling(hist, reps=10): - nbins = hist.GetNbinsX(); - xaxis = hist.GetXaxis() - intg = hist.GetIntegral() - cdf_y = np.empty(nbins+1, dtype=float) - cdf_x = np.empty_like(cdf_y) - - cdf_x[0] = xaxis.GetBinLowEdge(1) - cdf_y[0] = 0.0 - for i in xrange(1,len(cdf_x)): - cdf_y[i] = intg[i] - cdf_x[i] = xaxis.GetBinUpEdge(i) - - cdf_x_gpu = gpuarray.to_gpu(cdf_x.astype(np.float32)) - cdf_y_gpu = gpuarray.to_gpu(cdf_y.astype(np.float32)) - block =(128,1,1) - grid = (128, 1) - out_gpu = gpuarray.empty(shape=int(block[0]*grid[0]), dtype=np.float32) - - out_h = ROOT.TH1D('out_h', '', hist.GetNbinsX(), - xaxis.GetXmin(), - xaxis.GetXmax()) - out_h.SetLineColor(ROOT.kGreen) - - for i in xrange(reps): - test_sample_cdf(np.int32(i), - np.int32(len(cdf_x_gpu)), - cdf_x_gpu, cdf_y_gpu, out_gpu, block=block, grid=grid) - out = out_gpu.get() - for v in out: - out_h.Fill(v) - - prob = out_h.KolmogorovTest(hist) - return prob, out_h - -def test_sampling(): - '''Verify that the CDF-based sampler on the GPU reproduces a binned - Gaussian distribution''' - f = ROOT.TF1('f_gaussian', 'gaus(0)', -5, 5) - f.SetParameters(1.0/np.sqrt(np.pi * 2), 0.0, 1.0) - gaussian = ROOT.TH1D('gaussian', '', 100, -5, 5) - gaussian.Add(f) - - prob, out_h = compare_sampling(gaussian, reps=50) - - assert prob > 0.01 - -- cgit