summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAnthony LaTorre <tlatorre9@gmail.com>2011-09-06 17:18:14 -0400
committerAnthony LaTorre <tlatorre9@gmail.com>2011-09-06 17:18:14 -0400
commit86cb5f26029b3357727b88fee3fa7dd799a8d6c8 (patch)
treefc915556ef813cf4ad59edeb7b59fee0e3244c34 /src
parent3d3140071713e523516793258e3c904ca162d7b7 (diff)
downloadchroma-86cb5f26029b3357727b88fee3fa7dd799a8d6c8.tar.gz
chroma-86cb5f26029b3357727b88fee3fa7dd799a8d6c8.tar.bz2
chroma-86cb5f26029b3357727b88fee3fa7dd799a8d6c8.zip
geometry on the GPU is now a struct created in the GPUGeometry class. coding style for cuda code is now compliant with python PEP 7 -- Style Guide for C Code.
Diffstat (limited to 'src')
-rw-r--r--src/__init__.py7
-rw-r--r--src/alpha.h141
-rw-r--r--src/daq.cu298
-rw-r--r--src/geometry.h74
-rw-r--r--src/hybrid_render.cu202
-rw-r--r--src/intersect.h69
-rw-r--r--src/kernel.cu389
-rw-r--r--src/linalg.h157
-rw-r--r--src/materials.h68
-rw-r--r--src/matrix.h312
-rw-r--r--src/mesh.h273
-rw-r--r--src/photon.h483
-rw-r--r--src/propagate.cu100
-rw-r--r--src/random.h85
-rw-r--r--src/render.cu169
-rw-r--r--src/rotate.h21
-rw-r--r--src/sorting.h134
-rw-r--r--src/tools.cu18
-rw-r--r--src/transform.cu38
19 files changed, 1515 insertions, 1523 deletions
diff --git a/src/__init__.py b/src/__init__.py
index f2f5215..e69de29 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -1,7 +0,0 @@
-from os.path import dirname
-
-with open(dirname(__file__) + '/kernel.cu') as f:
- kernel = f.read()
-
-with open(dirname(__file__) + '/daq.cu') as f:
- daq = f.read()
diff --git a/src/alpha.h b/src/alpha.h
deleted file mode 100644
index ac75834..0000000
--- a/src/alpha.h
+++ /dev/null
@@ -1,141 +0,0 @@
-#ifndef __ALPHA_H__
-#define __ALPHA_H__
-
-#include "linalg.h"
-#include "intersect.h"
-#include "mesh.h"
-#include "sorting.h"
-
-#include "stdio.h"
-
-#define ALPHA_DEPTH 10
-
-__device__ int get_color_alpha(const float3 &origin, const float3& direction)
-{
- float distance;
-
- if (!intersect_node(origin, direction, g_start_node, -1.0f))
- return 0;
-
- unsigned int stack[STACK_SIZE];
-
- unsigned int *head = &stack[0];
- unsigned int *node = &stack[1];
- unsigned int *tail = &stack[STACK_SIZE-1];
- *node = g_start_node;
-
- unsigned int i;
-
- float distances[ALPHA_DEPTH];
- unsigned int indices[ALPHA_DEPTH];
- unsigned int n=0;
-
- do
- {
- unsigned int first_child = tex1Dfetch(node_map, *node);
- unsigned int stop = tex1Dfetch(node_map_end, *node);
-
- while (*node >= g_first_node && stop == first_child+1)
- {
- *node = first_child;
- first_child = tex1Dfetch(node_map, *node);
- stop = tex1Dfetch(node_map_end, *node);
- }
-
- if (*node >= g_first_node)
- {
- for (i=first_child; i < stop; i++)
- {
- if (intersect_node(origin, direction, i, -1.0f))
- {
- *node = i;
- node++;
- }
- }
-
- node--;
- }
- else // node is a leaf
- {
- for (i=first_child; i < stop; i++)
- {
- uint4 triangle_data = g_triangles[i];
-
- float3 v0 = g_vertices[triangle_data.x];
- float3 v1 = g_vertices[triangle_data.y];
- float3 v2 = g_vertices[triangle_data.z];
-
- if (intersect_triangle(origin, direction, v0, v1, v2, distance))
- {
- if (n < 1)
- {
- distances[0] = distance;
- indices[0] = i;
- }
- else
- {
- unsigned long j = searchsorted(n, distances, distance);
-
- if (j <= ALPHA_DEPTH-1)
- {
- insert(ALPHA_DEPTH, distances, j, distance);
- insert(ALPHA_DEPTH, indices, j, i);
- }
- }
-
- if (n < ALPHA_DEPTH)
- n++;
- }
-
- } // triangle loop
-
- node--;
-
- } // node is a leaf
-
- } // while loop
- while (node != head);
-
- if (n < 1)
- return 0;
-
- float scale = 1.0f;
- float fr = 0.0f;
- float fg = 0.0f;
- float fb = 0.0f;
- unsigned int index;
- for (i=0; i < n; i++)
- {
- index = indices[i];
-
- uint4 triangle_data = g_triangles[index];
-
- float3 v0 = g_vertices[triangle_data.x];
- float3 v1 = g_vertices[triangle_data.y];
- float3 v2 = g_vertices[triangle_data.z];
-
- float cos_theta = dot(normalize(cross(v1-v0,v2-v1)),-direction);
-
- if (cos_theta < 0.0f)
- cos_theta = dot(-normalize(cross(v1-v0,v2-v1)),-direction);
-
- unsigned int r0 = 0xff & (g_colors[index] >> 16);
- unsigned int g0 = 0xff & (g_colors[index] >> 8);
- unsigned int b0 = 0xff & g_colors[index];
-
- float alpha = (255 - (0xff & (g_colors[index] >> 24)))/255.0f;
-
- fr += r0*scale*cos_theta*alpha;
- fg += g0*scale*cos_theta*alpha;
- fb += b0*scale*cos_theta*alpha;
-
- scale *= (1.0f-alpha);
- }
- unsigned int r = floorf(fr);
- unsigned int g = floorf(fg);
- unsigned int b = floorf(fb);
-
- return r << 16 | g << 8 | b;
-}
-
-#endif
diff --git a/src/daq.cu b/src/daq.cu
index cb53d56..5a68846 100644
--- a/src/daq.cu
+++ b/src/daq.cu
@@ -1,208 +1,198 @@
// -*-c++-*-
#include <curand_kernel.h>
-__device__ unsigned int float_to_sortable_int(float f)
+__device__ unsigned int
+float_to_sortable_int(float f)
{
- return __float_as_int(f);
- //int i = __float_as_int(f);
- //unsigned int mask = -(int)(i >> 31) | 0x80000000;
- //return i ^ mask;
+ return __float_as_int(f);
+ //int i = __float_as_int(f);
+ //unsigned int mask = -(int)(i >> 31) | 0x80000000;
+ //return i ^ mask;
}
-__device__ float sortable_int_to_float(unsigned int i)
+__device__ float
+sortable_int_to_float(unsigned int i)
{
- return __int_as_float(i);
- //unsigned int mask = ((i >> 31) - 1) | 0x80000000;
- //return __int_as_float(i ^ mask);
+ return __int_as_float(i);
+ //unsigned int mask = ((i >> 31) - 1) | 0x80000000;
+ //return __int_as_float(i ^ mask);
}
extern "C"
{
-__global__ void reset_earliest_time_int(float maxtime, int ntime_ints, unsigned int *time_ints)
+__global__ void
+reset_earliest_time_int(float maxtime, int ntime_ints, unsigned int *time_ints)
{
- int id = threadIdx.x + blockDim.x * blockIdx.x;
- if (id < ntime_ints) {
- unsigned int maxtime_int = float_to_sortable_int(maxtime);
- time_ints[id] = maxtime_int;
- }
+ int id = threadIdx.x + blockDim.x * blockIdx.x;
+ if (id < ntime_ints) {
+ unsigned int maxtime_int = float_to_sortable_int(maxtime);
+ time_ints[id] = maxtime_int;
+ }
}
-__global__ void run_daq(curandState *s, unsigned int detection_state,
- float time_rms,
- int first_photon,
- int nphotons, float *photon_times,
- unsigned int *photon_histories,
- int *last_hit_triangles, int *solid_map,
- int nsolids, unsigned int *earliest_time_int,
- unsigned int *channel_q,
- unsigned int *channel_histories)
+__global__ void
+run_daq(curandState *s, unsigned int detection_state, float time_rms,
+ int first_photon, int nphotons, float *photon_times,
+ unsigned int *photon_histories, int *last_hit_triangles,
+ int *solid_map, int nsolids, unsigned int *earliest_time_int,
+ unsigned int *channel_q, unsigned int *channel_histories)
{
- int id = threadIdx.x + blockDim.x * blockIdx.x;
+ int id = threadIdx.x + blockDim.x * blockIdx.x;
- if (id < nphotons)
- {
- curandState rng = s[id];
- int photon_id = id + first_photon;
- int triangle_id = last_hit_triangles[photon_id];
+ if (id < nphotons) {
+ curandState rng = s[id];
+ int photon_id = id + first_photon;
+ int triangle_id = last_hit_triangles[photon_id];
- if (triangle_id > -1)
- {
- int solid_id = solid_map[triangle_id];
- unsigned int history = photon_histories[photon_id];
-
- if (solid_id < nsolids && (history & detection_state))
- {
- float time = photon_times[photon_id] + curand_normal(&rng) * time_rms;
- unsigned int time_int = float_to_sortable_int(time);
-
- atomicMin(earliest_time_int + solid_id, time_int);
- atomicAdd(channel_q + solid_id, 1);
- atomicOr(channel_histories + solid_id, history);
- }
-
- }
+ if (triangle_id > -1) {
+ int solid_id = solid_map[triangle_id];
+ unsigned int history = photon_histories[photon_id];
- s[id] = rng;
+ if (solid_id < nsolids && (history & detection_state)) {
+ float time = photon_times[photon_id] + curand_normal(&rng) * time_rms;
+ unsigned int time_int = float_to_sortable_int(time);
+
+ atomicMin(earliest_time_int + solid_id, time_int);
+ atomicAdd(channel_q + solid_id, 1);
+ atomicOr(channel_histories + solid_id, history);
+ }
}
+ s[id] = rng;
+
+ }
+
}
-__global__ void convert_sortable_int_to_float(int n,
- unsigned int *sortable_ints,
- float *float_output)
+__global__ void
+convert_sortable_int_to_float(int n, unsigned int *sortable_ints,
+ float *float_output)
{
- int id = threadIdx.x + blockDim.x * blockIdx.x;
+ int id = threadIdx.x + blockDim.x * blockIdx.x;
- if (id < n)
- float_output[id] = sortable_int_to_float(sortable_ints[id]);
+ if (id < n)
+ float_output[id] = sortable_int_to_float(sortable_ints[id]);
}
-__global__ void bin_hits(int nchannels,
- unsigned int *channel_q, float *channel_time,
- unsigned int *hitcount,
- int tbins, float tmin, float tmax,
- int qbins, float qmin, float qmax,
- unsigned int *pdf)
+__global__ void
+bin_hits(int nchannels, unsigned int *channel_q, float *channel_time,
+ unsigned int *hitcount, int tbins, float tmin, float tmax, int qbins,
+ float qmin, float qmax, unsigned int *pdf)
{
- int id = threadIdx.x + blockDim.x * blockIdx.x;
+ int id = threadIdx.x + blockDim.x * blockIdx.x;
- if (id >= nchannels)
- return;
+ if (id >= nchannels)
+ return;
- unsigned int q = channel_q[id];
- float t = channel_time[id];
+ unsigned int q = channel_q[id];
+ float t = channel_time[id];
- if (t < 1e8 && t >= tmin && t < tmax && q >= qmin && q < qmax) {
- hitcount[id] += 1;
+ if (t < 1e8 && t >= tmin && t < tmax && q >= qmin && q < qmax) {
+ hitcount[id] += 1;
- int tbin = (t - tmin) / (tmax - tmin) * tbins;
- int qbin = (q - qmin) / (qmax - qmin) * qbins;
+ int tbin = (t - tmin) / (tmax - tmin) * tbins;
+ int qbin = (q - qmin) / (qmax - qmin) * qbins;
- // row major order (channel, t, q)
- int bin = id * (tbins * qbins) + tbin * qbins + qbin;
- pdf[bin] += 1;
- }
+ // row major order (channel, t, q)
+ int bin = id * (tbins * qbins) + tbin * qbins + qbin;
+ pdf[bin] += 1;
+ }
}
-__global__ void accumulate_pdf_eval(int time_only,
- int nchannels,
- unsigned int *event_hit,
- float *event_time,
- float *event_charge,
- float *mc_time,
- unsigned int *mc_charge, // quirk of DAQ!
- unsigned int *hitcount,
- unsigned int *bincount,
- float min_twidth, float tmin, float tmax,
- float min_qwidth, float qmin, float qmax,
- float *nearest_mc,
- int min_bin_content)
+__global__ void
+accumulate_pdf_eval(int time_only, int nchannels, unsigned int *event_hit,
+ float *event_time, float *event_charge, float *mc_time,
+ unsigned int *mc_charge, // quirk of DAQ!
+ unsigned int *hitcount, unsigned int *bincount,
+ float min_twidth, float tmin, float tmax,
+ float min_qwidth, float qmin, float qmax,
+ float *nearest_mc, int min_bin_content)
{
- int id = threadIdx.x + blockDim.x * blockIdx.x;
+ int id = threadIdx.x + blockDim.x * blockIdx.x;
- if (id >= nchannels)
- return;
+ if (id >= nchannels)
+ return;
- // Was this channel hit in the Monte Carlo?
- float channel_mc_time = mc_time[id];
- if (channel_mc_time >= 1e8)
- return; // Nothing else to do
+ // Was this channel hit in the Monte Carlo?
+ float channel_mc_time = mc_time[id];
+ if (channel_mc_time >= 1e8)
+ return; // Nothing else to do
- // Is this channel inside the range of the PDF?
- float distance;
- int channel_bincount = bincount[id];
- if (time_only) {
- if (channel_mc_time < tmin || channel_mc_time > tmax)
- return; // Nothing else to do
+ // Is this channel inside the range of the PDF?
+ float distance;
+ int channel_bincount = bincount[id];
+ if (time_only) {
+ if (channel_mc_time < tmin || channel_mc_time > tmax)
+ return; // Nothing else to do
- // This MC information is contained in the PDF
- hitcount[id] += 1;
+ // This MC information is contained in the PDF
+ hitcount[id] += 1;
- // Was this channel hit in the event-of-interest?
- int channel_event_hit = event_hit[id];
- if (!channel_event_hit)
- return; // No need to update PDF value for unhit channel
+ // Was this channel hit in the event-of-interest?
+ int channel_event_hit = event_hit[id];
+ if (!channel_event_hit)
+ return; // No need to update PDF value for unhit channel
- // Are we inside the minimum size bin?
- float channel_event_time = event_time[id];
- distance = fabsf(channel_mc_time - channel_event_time);
- if (distance < min_twidth/2.0)
- bincount[id] = channel_bincount + 1;
-
- } else { // time and charge PDF
- float channel_mc_charge = mc_charge[id]; // int->float conversion because DAQ just returns an integer
+ // Are we inside the minimum size bin?
+ float channel_event_time = event_time[id];
+ distance = fabsf(channel_mc_time - channel_event_time);
+ if (distance < min_twidth/2.0)
+ bincount[id] = channel_bincount + 1;
+
+ }
+ else { // time and charge PDF
+ float channel_mc_charge = mc_charge[id]; // int->float conversion because DAQ just returns an integer
- if (channel_mc_time < tmin || channel_mc_time > tmax ||
- channel_mc_charge < qmin || channel_mc_charge > qmax)
- return; // Nothing else to do
+ if (channel_mc_time < tmin || channel_mc_time > tmax ||
+ channel_mc_charge < qmin || channel_mc_charge > qmax)
+ return; // Nothing else to do
- // This MC information is contained in the PDF
- hitcount[id] += 1;
+ // This MC information is contained in the PDF
+ hitcount[id] += 1;
- // Was this channel hit in the event-of-interest?
- int channel_event_hit = event_hit[id];
- if (!channel_event_hit)
- return; // No need to update PDF value for unhit channel
+ // Was this channel hit in the event-of-interest?
+ int channel_event_hit = event_hit[id];
+ if (!channel_event_hit)
+ return; // No need to update PDF value for unhit channel
- // Are we inside the minimum size bin?
- float channel_event_time = event_time[id];
- float channel_event_charge = event_charge[id];
- float normalized_time_distance = fabsf(channel_event_time - channel_mc_time)/min_twidth/2.0;
- float normalized_charge_distance = fabsf(channel_event_charge - channel_mc_charge)/min_qwidth/2.0;
- distance = sqrt(normalized_time_distance*normalized_time_distance + normalized_charge_distance*normalized_charge_distance);
-
- if (distance < 1.0f)
- bincount[id] = channel_bincount + 1;
- }
-
- // Do we need to keep updating the nearest_mc list?
- if (channel_bincount + 1 >= min_bin_content)
- return; // No need to perform insertion into nearest_mc because bincount is a better estimate of the PDF value
-
- // insertion sort the distance into the array of nearest MC points
- int offset = min_bin_content * id;
- int entry = min_bin_content - 1;
+ // Are we inside the minimum size bin?
+ float channel_event_time = event_time[id];
+ float channel_event_charge = event_charge[id];
+ float normalized_time_distance = fabsf(channel_event_time - channel_mc_time)/min_twidth/2.0;
+ float normalized_charge_distance = fabsf(channel_event_charge - channel_mc_charge)/min_qwidth/2.0;
+ distance = sqrt(normalized_time_distance*normalized_time_distance + normalized_charge_distance*normalized_charge_distance);
+
+ if (distance < 1.0f)
+ bincount[id] = channel_bincount + 1;
+ }
+
+ // Do we need to keep updating the nearest_mc list?
+ if (channel_bincount + 1 >= min_bin_content)
+ return; // No need to perform insertion into nearest_mc because bincount is a better estimate of the PDF value
+
+ // insertion sort the distance into the array of nearest MC points
+ int offset = min_bin_content * id;
+ int entry = min_bin_content - 1;
- // If last entry less than new entry, nothing to update
- if (distance > nearest_mc[offset + entry])
- return;
-
- // Find where to insert the new entry while sliding the rest
- // to the right
+ // If last entry less than new entry, nothing to update
+ if (distance > nearest_mc[offset + entry])
+ return;
+
+ // Find where to insert the new entry while sliding the rest
+ // to the right
+ entry--;
+ while (entry >= 0) {
+ if (nearest_mc[offset+entry] >= distance)
+ nearest_mc[offset+entry+1] = nearest_mc[offset+entry];
+ else
+ break;
entry--;
- while (entry >= 0) {
- if (nearest_mc[offset+entry] >= distance)
- nearest_mc[offset+entry+1] = nearest_mc[offset+entry];
- else
- break;
- entry--;
- }
+ }
- nearest_mc[offset+entry+1] = distance;
+ nearest_mc[offset+entry+1] = distance;
}
-
} // extern "C"
diff --git a/src/geometry.h b/src/geometry.h
new file mode 100644
index 0000000..2b5eacb
--- /dev/null
+++ b/src/geometry.h
@@ -0,0 +1,74 @@
+#ifndef __GEOMETRY_H__
+#define __GEOMETRY_H__
+
+struct Material
+{
+ float *refractive_index;
+ float *absorption_length;
+ float *scattering_length;
+ unsigned int n;
+ float step;
+ float wavelength0;
+};
+
+struct Surface
+{
+ float *detect;
+ float *absorb;
+ float *reflect_diffuse;
+ float *reflect_specular;
+ unsigned int n;
+ float step;
+ float wavelength0;
+};
+
+struct Triangle
+{
+ float3 v0, v1, v2;
+};
+
+struct Geometry
+{
+ float3 *vertices;
+ uint3 *triangles;
+ unsigned int *material_codes;
+ unsigned int *colors;
+ float3 *lower_bounds;
+ float3 *upper_bounds;
+ unsigned int *node_map;
+ unsigned int *node_map_end;
+ Material **materials;
+ Surface **surfaces;
+ unsigned int start_node;
+ unsigned int first_node;
+};
+
+__device__ Triangle
+get_triangle(Geometry *geometry, const unsigned int &i)
+{
+ uint3 triangle_data = geometry->triangles[i];
+
+ Triangle triangle;
+ triangle.v0 = geometry->vertices[triangle_data.x];
+ triangle.v1 = geometry->vertices[triangle_data.y];
+ triangle.v2 = geometry->vertices[triangle_data.z];
+
+ return triangle;
+}
+
+template <class T>
+__device__ float
+interp_property(T *m, const float &x, const float *fp)
+{
+ if (x < m->wavelength0)
+ return fp[0];
+
+ if (x > (m->wavelength0 + (m->n-1)*m->step))
+ return fp[m->n-1];
+
+ int jl = (x-m->wavelength0)/m->step;
+
+ return fp[jl] + (x-(m->wavelength0 + jl*m->step))*(fp[jl+1]-fp[jl])/m->step;
+}
+
+#endif
diff --git a/src/hybrid_render.cu b/src/hybrid_render.cu
new file mode 100644
index 0000000..29edefa
--- /dev/null
+++ b/src/hybrid_render.cu
@@ -0,0 +1,202 @@
+//-*-c-*-
+#include <math_constants.h>
+#include <curand_kernel.h>
+
+#include "linalg.h"
+#include "matrix.h"
+#include "rotate.h"
+#include "mesh.h"
+#include "geometry.h"
+#include "photon.h"
+
+__device__ void
+fAtomicAdd(float *addr, float data)
+{
+ while (data)
+ data = atomicExch(addr, data+atomicExch(addr, 0.0f));
+}
+
+__device__ void
+to_diffuse(Photon &p, State &s, Geometry *g, curandState &rng, int max_steps)
+{
+ int steps = 0;
+ while (steps < max_steps) {
+ steps++;
+
+ int command;
+
+ fill_state(s, p, g);
+
+ if (p.last_hit_triangle == -1)
+ break;
+
+ command = propagate_to_boundary(p, s, rng);
+
+ if (command == BREAK)
+ break;
+
+ if (command == CONTINUE)
+ continue;
+
+ if (s.surface_index != -1) {
+ command = propagate_at_surface(p, s, rng, g);
+
+ if (p.history & REFLECT_DIFFUSE)
+ break;
+
+ if (command == BREAK)
+ break;
+
+ if (command == CONTINUE)
+ continue;
+ }
+
+ propagate_at_boundary(p, s, rng);
+
+ } // while (steps < max_steps)
+
+} // to_diffuse
+
+extern "C"
+{
+
+__global__ void
+update_xyz_lookup(int nthreads, int total_threads, int offset, float3 position,
+ curandState *rng_states, float wavelength, float3 xyz,
+ float3 *xyz_lookup1, float3 *xyz_lookup2, int max_steps,
+ Geometry *g)
+{
+ int kernel_id = blockIdx.x*blockDim.x + threadIdx.x;
+ int id = kernel_id + offset;
+
+ if (kernel_id >= nthreads || id >= total_threads)
+ return;
+
+ curandState rng = rng_states[kernel_id];
+
+ Triangle t = get_triangle(g, id);
+
+ float a = curand_uniform(&rng);
+ float b = uniform(&rng, 0.0f, (1.0f - a));
+ float c = 1.0f - a - b;
+
+ float3 direction = a*t.v0 + b*t.v1 + c*t.v2 - position;
+ direction /= norm(direction);
+
+ float distance;
+ int triangle_index = intersect_mesh(position, direction, g, distance);
+
+ if (triangle_index != id) {
+ rng_states[kernel_id] = rng;
+ return;
+ }
+
+ float3 v01 = t.v1 - t.v0;
+ float3 v12 = t.v2 - t.v1;
+
+ float3 surface_normal = normalize(cross(v01,v12));
+
+ float cos_theta = dot(surface_normal,-direction);
+
+ if (cos_theta < 0.0f)
+ cos_theta = dot(-surface_normal,-direction);
+
+ Photon p;
+ p.position = position;
+ p.direction = direction;
+ p.wavelength = wavelength;
+ p.polarization = uniform_sphere(&rng);
+ p.last_hit_triangle = -1;
+ p.time = 0;
+ p.history = 0;
+
+ State s;
+ to_diffuse(p, s, g, rng, max_steps);
+
+ if (p.history & REFLECT_DIFFUSE) {
+ if (s.inside_to_outside) {
+ fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].x, cos_theta*xyz.x);
+ fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].y, cos_theta*xyz.y);
+ fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].z, cos_theta*xyz.z);
+ }
+ else {
+ fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].x, cos_theta*xyz.x);
+ fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].y, cos_theta*xyz.y);
+ fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].z, cos_theta*xyz.z);
+ }
+ }
+
+ rng_states[kernel_id] = rng;
+
+} // update_xyz_lookup
+
+__global__ void
+update_xyz_image(int nthreads, curandState *rng_states, float3 *positions,
+ float3 *directions, float wavelength, float3 xyz,
+ float3 *xyz_lookup1, float3 *xyz_lookup2, float3 *image,
+ int nlookup_calls, int max_steps, Geometry *g)
+{
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
+
+ if (id >= nthreads)
+ return;
+
+ curandState rng = rng_states[id];
+
+ Photon p;
+ p.position = positions[id];
+ p.direction = directions[id];
+ p.direction /= norm(p.direction);
+ p.wavelength = wavelength;
+ p.polarization = uniform_sphere(&rng);
+ p.last_hit_triangle = -1;
+ p.time = 0;
+ p.history = 0;
+
+ State s;
+ to_diffuse(p, s, g, rng, max_steps);
+
+ if (p.history & REFLECT_DIFFUSE) {
+ if (s.inside_to_outside)
+ image[id] += xyz*xyz_lookup1[p.last_hit_triangle]/nlookup_calls;
+ else
+ image[id] += xyz*xyz_lookup2[p.last_hit_triangle]/nlookup_calls;
+ }
+
+ rng_states[id] = rng;
+
+} // update_xyz_image
+
+__global__ void
+process_image(int nthreads, float3 *image, unsigned int *pixels, int nimages)
+{
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
+
+ if (id >= nthreads)
+ return;
+
+ float3 rgb = image[id]/nimages;
+
+ if (rgb.x < 0.0f)
+ rgb.x = 0.0f;
+ if (rgb.y < 0.0f)
+ rgb.y = 0.0f;
+ if (rgb.z < 0.0f)
+ rgb.z = 0.0f;
+
+ if (rgb.x > 1.0f)
+ rgb.x = 1.0f;
+ if (rgb.y > 1.0f)
+ rgb.y = 1.0f;
+ if (rgb.z > 1.0f)
+ rgb.z = 1.0f;
+
+ unsigned int r = floorf(rgb.x*255.0f);
+ unsigned int g = floorf(rgb.y*255.0f);
+ unsigned int b = floorf(rgb.z*255.0f);
+
+ pixels[id] = 255 << 24 | r << 16 | g << 8 | b;
+
+} // process_image
+
+} // extern "c"
diff --git a/src/intersect.h b/src/intersect.h
index 26e1d7e..978bde8 100644
--- a/src/intersect.h
+++ b/src/intersect.h
@@ -3,30 +3,32 @@
#ifndef __INTERSECT_H__
#define __INTERSECT_H__
-#include <math_constants.h>
-
#include "linalg.h"
#include "matrix.h"
-#include "rotate.h"
+#include "geometry.h"
#define EPSILON 0.0f
-/* Test the intersection between a ray starting from `origin` traveling in the
- direction `direction` and a triangle defined by the vertices `v0`, `v1`, and
- `v2`. If the ray intersects the triangle, set `distance` to the distance
- between `origin` and the intersection and return true, else return false.
-
- `direction` must be normalized. */
-__device__ bool intersect_triangle(const float3 &origin, const float3 &direction, const float3 &v0, const float3 &v1, const float3 &v2, float &distance)
+/* Tests the intersection between a ray and a triangle.
+ If the ray intersects the triangle, set `distance` to the distance from
+ `origin` to the intersection and return true, else return false.
+ `direction` must be normalized to one. */
+__device__ bool
+intersect_triangle(const float3 &origin, const float3 &direction,
+ const Triangle &triangle, float &distance)
{
- Matrix m = make_matrix(v1-v0, v2-v0, -direction);
+ float3 m1 = triangle.v1-triangle.v0;
+ float3 m2 = triangle.v2-triangle.v0;
+ float3 m3 = -direction;
+
+ Matrix m = make_matrix(m1, m2, m3);
float determinant = det(m);
if (determinant == 0.0f)
return false;
- float3 b = origin-v0;
+ float3 b = origin-triangle.v0;
float u1 = ((m.a11*m.a22 - m.a12*m.a21)*b.x +
(m.a02*m.a21 - m.a01*m.a22)*b.y +
@@ -54,38 +56,17 @@ __device__ bool intersect_triangle(const float3 &origin, const float3 &direction
return true;
}
-/* Return the 32 bit color associated with the intersection between a ray
- starting from `origin` traveling in the direction `direction` and the
- plane defined by the points `v0`, `v1`, and `v2` using the cosine of the
- angle between the ray and the plane normal to determine the brightness.
-
- `direction` must be normalized. */
-__device__ unsigned int get_color(const float3 &direction, const float3 &v0, const float3& v1, const float3 &v2, unsigned int base_color=0xFFFFFF)
-{
- float scale = dot(normalize(cross(v1-v0,v2-v1)),-direction);
-
- if (scale < 0.0f)
- {
- base_color = 0xff0000;
- scale = dot(-normalize(cross(v1-v0,v2-v1)),-direction);
- }
-
- unsigned int r = 0xFF & (base_color >> 16);
- unsigned int g = 0xFF & (base_color >> 8);
- unsigned int b = 0xFF & base_color;
-
- r = floorf(r*scale);
- g = floorf(g*scale);
- b = floorf(b*scale);
-
- return r << 16 | g << 8 | b;
-}
-
-/* Test the intersection between a ray starting from `origin` traveling in the
- direction `direction` and the axis-aligned box defined by the opposite
- vertices `lower_bound` and `upper_bound`. If the ray intersects the box
- return True, else return False. */
-__device__ bool intersect_box(const float3 &origin, const float3 &direction, const float3 &lower_bound, const float3 &upper_bound, float& distance_to_box)
+/* Tests the intersection between a ray and an axis-aligned box defined by
+ an upper and lower bound. If the ray intersects the box, set
+ `distance_to_box` to the distance from `origin` to the intersection and
+ return true, else return false. `direction` must be normalized to one.
+
+ Source: "An Efficient and Robust Ray-Box Intersection Algorithm."
+ by Williams, et. al. */
+__device__ bool
+intersect_box(const float3 &origin, const float3 &direction,
+ const float3 &lower_bound, const float3 &upper_bound,
+ float& distance_to_box)
{
float kmin, kmax, kymin, kymax, kzmin, kzmax;
diff --git a/src/kernel.cu b/src/kernel.cu
deleted file mode 100644
index 4418305..0000000
--- a/src/kernel.cu
+++ /dev/null
@@ -1,389 +0,0 @@
-//-*-c-*-
-#include <math_constants.h>
-#include <curand_kernel.h>
-
-#include "linalg.h"
-#include "matrix.h"
-#include "rotate.h"
-#include "mesh.h"
-#include "photon.h"
-#include "alpha.h"
-
-__device__ void fAtomicAdd(float *addr, float data)
-{
- while (data)
- {
- data = atomicExch(addr, data+atomicExch(addr, 0.0f));
- }
-}
-
-__device__ void to_diffuse(Photon &p, State &s, curandState &rng, const int &max_steps)
-{
- int steps = 0;
- while (steps < max_steps)
- {
- steps++;
-
- int command;
-
- fill_state(s, p);
-
- if (p.last_hit_triangle == -1)
- break;
-
- command = propagate_to_boundary(p, s, rng);
-
- if (command == BREAK)
- break;
-
- if (command == CONTINUE)
- continue;
-
- if (s.surface_index != -1)
- {
- command = propagate_at_surface(p, s, rng);
-
- if (p.history & REFLECT_DIFFUSE)
- break;
-
- if (command == BREAK)
- break;
-
- if (command == CONTINUE)
- continue;
- }
-
- propagate_at_boundary(p, s, rng);
-
- } // while (steps < max_steps)
-
-} // to_diffuse
-
-extern "C"
-{
-
-__global__ void update_xyz_lookup(int nthreads, int total_threads, int offset, float3 position, curandState *rng_states, float wavelength, float3 xyz, float3 *xyz_lookup1, float3 *xyz_lookup2, int max_steps)
-{
- int kernel_id = blockIdx.x*blockDim.x + threadIdx.x;
- int id = kernel_id + offset;
-
- if (kernel_id >= nthreads || id >= total_threads)
- return;
-
- curandState rng = rng_states[kernel_id];
-
- uint4 triangle_data = g_triangles[id];
-
- float3 v0 = g_vertices[triangle_data.x];
- float3 v1 = g_vertices[triangle_data.y];
- float3 v2 = g_vertices[triangle_data.z];
-
- float a = curand_uniform(&rng);
- float b = uniform(&rng, 0.0f, (1.0f - a));
- float c = 1.0f - a - b;
-
- float3 direction = a*v0 + b*v1 + c*v2 - position;
- direction /= norm(direction);
-
- float distance;
- int triangle_index = intersect_mesh(position, direction, distance);
-
- if (triangle_index != id)
- {
- rng_states[kernel_id] = rng;
- return;
- }
-
- float cos_theta = dot(normalize(cross(v1-v0,v2-v1)),-direction);
-
- if (cos_theta < 0.0f)
- cos_theta = dot(-normalize(cross(v1-v0,v2-v1)),-direction);
-
- Photon p;
- p.position = position;
- p.direction = direction;
- p.wavelength = wavelength;
- p.polarization = uniform_sphere(&rng);
- p.last_hit_triangle = -1;
- p.time = 0;
- p.history = 0;
-
- State s;
- to_diffuse(p, s, rng, max_steps);
-
- if (p.history & REFLECT_DIFFUSE)
- {
- if (s.inside_to_outside)
- {
- fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].x, cos_theta*xyz.x);
- fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].y, cos_theta*xyz.y);
- fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].z, cos_theta*xyz.z);
- }
- else
- {
- fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].x, cos_theta*xyz.x);
- fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].y, cos_theta*xyz.y);
- fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].z, cos_theta*xyz.z);
- }
- }
-
- rng_states[kernel_id] = rng;
-
-} // update_xyz_lookup
-
-__global__ void update_xyz_image(int nthreads, curandState *rng_states, float3 *positions, float3 *directions, float wavelength, float3 xyz, float3 *xyz_lookup1, float3 *xyz_lookup2, float3 *image, int nlookup_calls, int max_steps)
-{
- int id = blockIdx.x*blockDim.x + threadIdx.x;
-
- if (id >= nthreads)
- return;
-
- curandState rng = rng_states[id];
-
- Photon p;
- p.position = positions[id];
- p.direction = directions[id];
- p.direction /= norm(p.direction);
- p.wavelength = wavelength;
- p.polarization = uniform_sphere(&rng);
- p.last_hit_triangle = -1;
- p.time = 0;
- p.history = 0;
-
- State s;
- to_diffuse(p, s, rng, max_steps);
-
- if (p.history & REFLECT_DIFFUSE)
- {
- if (s.inside_to_outside)
- {
- image[id].x += xyz.x*xyz_lookup1[p.last_hit_triangle].x/nlookup_calls;
- image[id].y += xyz.y*xyz_lookup1[p.last_hit_triangle].y/nlookup_calls;
- image[id].z += xyz.z*xyz_lookup1[p.last_hit_triangle].z/nlookup_calls;
- }
- else
- {
- image[id].x += xyz.x*xyz_lookup2[p.last_hit_triangle].x/nlookup_calls;
- image[id].y += xyz.y*xyz_lookup2[p.last_hit_triangle].y/nlookup_calls;
- image[id].z += xyz.z*xyz_lookup2[p.last_hit_triangle].z/nlookup_calls;
- }
- }
-
- rng_states[id] = rng;
-
-} // update_xyz_image
-
-__global__ void process_image(int nthreads, float3 *image, int *pixels, int nimages)
-{
- int id = blockIdx.x*blockDim.x + threadIdx.x;
-
- if (id >= nthreads)
- return;
-
- float3 rgb = image[id]/nimages;
-
- if (rgb.x < 0.0f)
- rgb.x = 0.0f;
- if (rgb.y < 0.0f)
- rgb.y = 0.0f;
- if (rgb.z < 0.0f)
- rgb.z = 0.0f;
-
- if (rgb.x > 1.0f)
- rgb.x = 1.0f;
- if (rgb.y > 1.0f)
- rgb.y = 1.0f;
- if (rgb.z > 1.0f)
- rgb.z = 1.0f;
-
- unsigned int r = floorf(rgb.x*255.0f);
- unsigned int g = floorf(rgb.y*255.0f);
- unsigned int b = floorf(rgb.z*255.0f);
-
- pixels[id] = r << 16 | g << 8 | b;
-
-} // process_image
-
-__global__ void distance_to_mesh(int nthreads, float3 *positions, float3 *directions, float *distances)
-{
- int id = blockIdx.x*blockDim.x + threadIdx.x;
-
- if (id >= nthreads)
- return;
-
- float3 position = positions[id];
- float3 direction = directions[id];
- direction /= norm(direction);
-
- float distance;
-
- int triangle_index = intersect_mesh(position, direction, distance);
-
- if (triangle_index == -1)
- distances[id] = 1e9;
- else
- distances[id] = distance;
-
-} // distance_to_mesh
-
-__global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, int *pixels)
-{
- int id = blockIdx.x*blockDim.x + threadIdx.x;
-
- if (id >= nthreads)
- return;
-
- float3 position = positions[id];
- float3 direction = directions[id];
- direction /= norm(direction);
-
- float distance;
-
- int triangle_index = intersect_mesh(position, direction, distance);
-
- if (triangle_index == -1)
- {
- pixels[id] = 0;
- }
- else
- {
- uint4 triangle_data = g_triangles[triangle_index];
-
- float3 v0 = g_vertices[triangle_data.x];
- float3 v1 = g_vertices[triangle_data.y];
- float3 v2 = g_vertices[triangle_data.z];
-
- pixels[id] = get_color(direction, v0, v1, v2, g_colors[triangle_index]);
- }
-
-} // ray_trace
-
-/* Trace the rays starting at `positions` traveling in the direction
- `directions` to their intersection with the global mesh. If the ray
- intersects the mesh set the pixel associated with the ray to a 32 bit
- color whose brightness is determined by the cosine of the angle between
- the ray and the normal of the triangle it intersected, else set the pixel
- to 0. */
-__global__ void ray_trace_alpha(int nthreads, float3 *positions, float3 *directions, int *pixels)
-{
- int id = blockIdx.x*blockDim.x + threadIdx.x;
-
- if (id >= nthreads)
- return;
-
- float3 position = positions[id];
- float3 direction = directions[id];
- direction /= norm(direction);
-
- bool hit;
- float distance;
-
- pixels[id] = get_color_alpha(position, direction);
-
-} // ray_trace
-
-__global__ void swap(int *values, int nswap, int *offset_a, int *offset_b)
-{
- int id = blockIdx.x*blockDim.x + threadIdx.x;
-
- if (id < nswap) {
- int a = offset_a[id];
- int b = offset_b[id];
-
- int tmp = values[a];
- values[a] = values[b];
- values[b] = tmp;
- }
-}
-
-__global__ void propagate(int first_photon, int nthreads,
- unsigned int *input_queue,
- unsigned int *output_queue,
- curandState *rng_states,
- float3 *positions, float3 *directions, float *wavelengths, float3 *polarizations, float *times,
- unsigned int *histories, int *last_hit_triangles, int max_steps)
-{
- int id = blockIdx.x*blockDim.x + threadIdx.x;
-
- if (id >= nthreads)
- return;
-
- curandState rng = rng_states[id];
-
- int photon_id = input_queue[first_photon + id];
-
- Photon p;
- p.position = positions[photon_id];
- p.direction = directions[photon_id];
- p.direction /= norm(p.direction);
- p.polarization = polarizations[photon_id];
- p.polarization /= norm(p.polarization);
- p.wavelength = wavelengths[photon_id];
- p.time = times[photon_id];
- p.last_hit_triangle = last_hit_triangles[photon_id];
- p.history = histories[photon_id];
-
- if (p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB))
- return;
-
- State s;
-
- int steps = 0;
- while (steps < max_steps)
- {
- steps++;
-
- int command;
-
- // check for NaN and fail
- if (isnan(p.direction.x*p.direction.y*p.direction.z*p.position.x*p.position.y*p.position.z))
- {
- p.history |= NO_HIT | NAN_ABORT;
- break;
- }
-
- fill_state(s, p);
-
- if (p.last_hit_triangle == -1)
- break;
-
- command = propagate_to_boundary(p, s, rng);
-
- if (command == BREAK)
- break;
-
- if (command == CONTINUE)
- continue;
-
- if (s.surface_index != -1)
- {
- command = propagate_at_surface(p, s, rng);
-
- if (command == BREAK)
- break;
-
- if (command == CONTINUE)
- continue;
- }
-
- propagate_at_boundary(p, s, rng);
-
- } // while (steps < max_steps)
-
- rng_states[id] = rng;
- positions[photon_id] = p.position;
- directions[photon_id] = p.direction;
- polarizations[photon_id] = p.polarization;
- wavelengths[photon_id] = p.wavelength;
- times[photon_id] = p.time;
- histories[photon_id] = p.history;
- last_hit_triangles[photon_id] = p.last_hit_triangle;
-
- // Not done, put photon in output queue
- if ( (p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB)) == 0) {
- int out_idx = atomicAdd(output_queue, 1);
- output_queue[out_idx] = photon_id;
- }
-} // propagate
-
-} // extern "c"
diff --git a/src/linalg.h b/src/linalg.h
index fe6b1b2..35b2423 100644
--- a/src/linalg.h
+++ b/src/linalg.h
@@ -1,121 +1,170 @@
#ifndef __LINALG_H__
#define __LINALG_H__
-__device__ __host__ float3 operator- (const float3 &a)
+__device__ float3
+operator- (const float3 &a)
{
- return make_float3(-a.x, -a.y, -a.z);
+ return make_float3(-a.x, -a.y, -a.z);
}
-__device__ __host__ float3 operator+ (const float3 &a, const float3 &b)
+__device__ float3
+operator* (const float3 &a, const float3 &b)
{
- return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
+ return make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
}
-__device__ __host__ void operator+= (float3 &a, const float3 &b)
+__device__ float3
+operator/ (const float3 &a, const float3 &b)
{
- a.x += b.x;
- a.y += b.y;
- a.z += b.z;
+ return make_float3(a.x/b.x, a.y/b.y, a.z/b.z);
}
-__device__ __host__ float3 operator- (const float3 &a, const float3 &b)
+__device__ void
+operator*= (float3 &a, const float3 &b)
{
- return make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
+ a.x *= b.x;
+ a.y *= b.y;
+ a.z *= b.z;
}
-__device__ __host__ void operator-= (float3 &a, const float3 &b)
+__device__ void
+operator/= (float3 &a, const float3 &b)
{
- a.x -= b.x;
- a.y -= b.y;
- a.z -= b.z;
+ a.x /= b.x;
+ a.y /= b.y;
+ a.z /= b.z;
}
-__device__ __host__ float3 operator+ (const float3 &a, const float &c)
+__device__ float3
+operator+ (const float3 &a, const float3 &b)
{
- return make_float3(a.x+c, a.y+c, a.z+c);
+ return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
}
-__device__ __host__ void operator+= (float3 &a, const float &c)
+__device__ void
+operator+= (float3 &a, const float3 &b)
{
- a.x += c;
- a.y += c;
- a.z += c;
+ a.x += b.x;
+ a.y += b.y;
+ a.z += b.z;
}
-__device__ __host__ float3 operator+ (const float &c, const float3 &a)
+__device__ float3
+operator- (const float3 &a, const float3 &b)
{
- return make_float3(c+a.x, c+a.y, c+a.z);
+ return make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
}
-__device__ __host__ float3 operator- (const float3 &a, const float &c)
+__device__ void
+operator-= (float3 &a, const float3 &b)
{
- return make_float3(a.x-c, a.y-c, a.z-c);
+ a.x -= b.x;
+ a.y -= b.y;
+ a.z -= b.z;
}
-__device__ __host__ void operator-= (float3 &a, const float &c)
+__device__ float3
+operator+ (const float3 &a, const float &c)
{
- a.x -= c;
- a.y -= c;
- a.z -= c;
+ return make_float3(a.x+c, a.y+c, a.z+c);
}
-__device__ __host__ float3 operator- (const float &c, const float3& a)
+__device__ void
+operator+= (float3 &a, const float &c)
{
- return make_float3(c-a.x, c-a.y, c-a.z);
+ a.x += c;
+ a.y += c;
+ a.z += c;
}
-__device__ __host__ float3 operator* (const float3 &a, const float &c)
+__device__ float3
+operator+ (const float &c, const float3 &a)
{
- return make_float3(a.x*c, a.y*c, a.z*c);
+ return make_float3(c+a.x, c+a.y, c+a.z);
}
-__device__ __host__ void operator*= (float3 &a, const float &c)
+__device__ float3
+operator- (const float3 &a, const float &c)
{
- a.x *= c;
- a.y *= c;
- a.z *= c;
+ return make_float3(a.x-c, a.y-c, a.z-c);
}
-__device__ __host__ float3 operator* (const float &c, const float3& a)
+__device__ void
+operator-= (float3 &a, const float &c)
{
- return make_float3(c*a.x, c*a.y, c*a.z);
+ a.x -= c;
+ a.y -= c;
+ a.z -= c;
}
-__device__ __host__ float3 operator/ (const float3 &a, const float &c)
+__device__ float3
+operator- (const float &c, const float3& a)
{
- return make_float3(a.x/c, a.y/c, a.z/c);
+ return make_float3(c-a.x, c-a.y, c-a.z);
}
-__device__ __host__ void operator/= (float3 &a, const float &c)
+__device__ float3
+operator* (const float3 &a, const float &c)
{
- a.x /= c;
- a.y /= c;
- a.z /= c;
+ return make_float3(a.x*c, a.y*c, a.z*c);
}
-__device__ __host__ float3 operator/ (const float &c, const float3 &a)
+__device__ void
+operator*= (float3 &a, const float &c)
{
- return make_float3(c/a.x, c/a.y, c/a.z);
+ a.x *= c;
+ a.y *= c;
+ a.z *= c;
}
-__device__ __host__ float dot(const float3 &a, const float3 &b)
+__device__ float3
+operator* (const float &c, const float3& a)
{
- return a.x*b.x + a.y*b.y + a.z*b.z;
+ return make_float3(c*a.x, c*a.y, c*a.z);
}
-__device__ __host__ float3 cross(const float3 &a, const float3 &b)
+__device__ float3
+operator/ (const float3 &a, const float &c)
{
- return make_float3(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
+ return make_float3(a.x/c, a.y/c, a.z/c);
}
-__device__ __host__ float norm(const float3 &a)
+__device__ void
+operator/= (float3 &a, const float &c)
{
- return sqrtf(dot(a,a));
+ a.x /= c;
+ a.y /= c;
+ a.z /= c;
}
-__device__ __host__ float3 normalize(const float3 &a)
+__device__ float3
+operator/ (const float &c, const float3 &a)
{
- return a/norm(a);
+ return make_float3(c/a.x, c/a.y, c/a.z);
+}
+
+__device__ float
+dot(const float3 &a, const float3 &b)
+{
+ return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+__device__ float3
+cross(const float3 &a, const float3 &b)
+{
+ return make_float3(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
+}
+
+__device__ float
+norm(const float3 &a)
+{
+ return sqrtf(dot(a,a));
+}
+
+__device__ float3
+normalize(const float3 &a)
+{
+ return a/norm(a);
}
#endif
diff --git a/src/materials.h b/src/materials.h
deleted file mode 100644
index 2355d24..0000000
--- a/src/materials.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef __MATERIALS_H__
-#define __MATERIALS_H__
-
-__device__ float min_wavelength;
-__device__ float max_wavelength;
-__device__ float wavelength_step;
-__device__ unsigned int wavelength_size;
-
-struct Material
-{
- float *refractive_index;
- float *absorption_length;
- float *scattering_length;
-};
-
-struct Surface
-{
- float *detect;
- float *absorb;
- float *reflect_diffuse;
- float *reflect_specular;
-};
-
-__device__ Material materials[20];
-__device__ Surface surfaces[20];
-
-__device__ float interp_property(const float &x, const float *fp)
-{
- if (x < min_wavelength)
- return fp[0];
-
- if (x > max_wavelength)
- return fp[wavelength_size-1];
-
- int jl = (x-min_wavelength)/wavelength_step;
-
- return fp[jl] + (x-(min_wavelength + jl*wavelength_step))*(fp[jl+1]-fp[jl])/wavelength_step;
-}
-
-extern "C"
-{
-
-__global__ void set_wavelength_range(float _min_wavelength, float _max_wavelength, float _wavelength_step, unsigned int _wavelength_size)
-{
- min_wavelength = _min_wavelength;
- max_wavelength = _max_wavelength;
- wavelength_step = _wavelength_step;
- wavelength_size = _wavelength_size;
-}
-
-__global__ void set_material(int material_index, float *refractive_index, float *absorption_length, float *scattering_length)
-{
- materials[material_index].refractive_index = refractive_index;
- materials[material_index].absorption_length = absorption_length;
- materials[material_index].scattering_length = scattering_length;
-}
-
-__global__ void set_surface(int surface_index, float *detect, float *absorb, float *reflect_diffuse, float *reflect_specular)
-{
- surfaces[surface_index].detect = detect;
- surfaces[surface_index].absorb = absorb;
- surfaces[surface_index].reflect_diffuse = reflect_diffuse;
- surfaces[surface_index].reflect_specular = reflect_specular;
-}
-
-} // extern "c"
-
-#endif
diff --git a/src/matrix.h b/src/matrix.h
index a3e480b..0a66e58 100644
--- a/src/matrix.h
+++ b/src/matrix.h
@@ -3,221 +3,247 @@
struct Matrix
{
- float a00, a01, a02, a10, a11, a12, a20, a21, a22;
+ float a00, a01, a02, a10, a11, a12, a20, a21, a22;
};
-__device__ __host__ Matrix make_matrix(float a00, float a01, float a02,
- float a10, float a11, float a12,
- float a20, float a21, float a22)
+__device__ Matrix
+make_matrix(float a00, float a01, float a02,
+ float a10, float a11, float a12,
+ float a20, float a21, float a22)
{
- Matrix m = {a00, a01, a02, a10, a11, a12, a20, a21, a22};
- return m;
+ Matrix m = {a00, a01, a02, a10, a11, a12, a20, a21, a22};
+ return m;
}
-__device__ __host__ Matrix make_matrix(const float3 &u1, const float3 &u2, const float3 &u3)
+__device__ Matrix
+make_matrix(const float3 &u1, const float3 &u2, const float3 &u3)
{
- Matrix m = {u1.x, u2.x, u3.x, u1.y, u2.y, u3.y, u1.z, u2.z, u3.z};
- return m;
+ Matrix m = {u1.x, u2.x, u3.x, u1.y, u2.y, u3.y, u1.z, u2.z, u3.z};
+ return m;
}
-__device__ __host__ Matrix operator- (const Matrix &m)
+__device__ Matrix
+operator- (const Matrix &m)
{
- return make_matrix(-m.a00, -m.a01, -m.a02,
- -m.a10, -m.a11, -m.a12,
- -m.a20, -m.a21, -m.a22);
+ return make_matrix(-m.a00, -m.a01, -m.a02,
+ -m.a10, -m.a11, -m.a12,
+ -m.a20, -m.a21, -m.a22);
}
-__device__ __host__ float3 operator* (const Matrix &m, const float3 &a)
+__device__ float3
+operator* (const Matrix &m, const float3 &a)
{
- return make_float3(m.a00*a.x + m.a01*a.y + m.a02*a.z,
- m.a10*a.x + m.a11*a.y + m.a12*a.z,
- m.a20*a.x + m.a21*a.y + m.a22*a.z);
+ return make_float3(m.a00*a.x + m.a01*a.y + m.a02*a.z,
+ m.a10*a.x + m.a11*a.y + m.a12*a.z,
+ m.a20*a.x + m.a21*a.y + m.a22*a.z);
}
-__device__ __host__ Matrix operator+ (const Matrix &m, const Matrix &n)
+__device__ Matrix
+operator+ (const Matrix &m, const Matrix &n)
{
- return make_matrix(m.a00+n.a00, m.a01+n.a01, m.a02+n.a02,
- m.a10+n.a10, m.a11+n.a11, m.a12+n.a12,
- m.a20+n.a20, m.a21+n.a21, m.a22+n.a22);
+ return make_matrix(m.a00+n.a00, m.a01+n.a01, m.a02+n.a02,
+ m.a10+n.a10, m.a11+n.a11, m.a12+n.a12,
+ m.a20+n.a20, m.a21+n.a21, m.a22+n.a22);
}
-__device__ __host__ void operator+= (Matrix &m, const Matrix &n)
+__device__ void
+operator+= (Matrix &m, const Matrix &n)
{
- m.a00 += n.a00;
- m.a01 += n.a01;
- m.a02 += n.a02;
- m.a10 += n.a10;
- m.a11 += n.a11;
- m.a12 += n.a12;
- m.a20 += n.a20;
- m.a21 += n.a21;
- m.a22 += n.a22;
+ m.a00 += n.a00;
+ m.a01 += n.a01;
+ m.a02 += n.a02;
+ m.a10 += n.a10;
+ m.a11 += n.a11;
+ m.a12 += n.a12;
+ m.a20 += n.a20;
+ m.a21 += n.a21;
+ m.a22 += n.a22;
}
-__device__ __host__ Matrix operator- (const Matrix &m, const Matrix &n)
+__device__ Matrix
+operator- (const Matrix &m, const Matrix &n)
{
- return make_matrix(m.a00-n.a00, m.a01-n.a01, m.a02-n.a02,
- m.a10-n.a10, m.a11-n.a11, m.a12-n.a12,
- m.a20-n.a20, m.a21-n.a21, m.a22-n.a22);
+ return make_matrix(m.a00-n.a00, m.a01-n.a01, m.a02-n.a02,
+ m.a10-n.a10, m.a11-n.a11, m.a12-n.a12,
+ m.a20-n.a20, m.a21-n.a21, m.a22-n.a22);
}
-__device__ __host__ void operator-= (Matrix &m, const Matrix& n)
+__device__ void
+operator-= (Matrix &m, const Matrix& n)
{
- m.a00 -= n.a00;
- m.a01 -= n.a01;
- m.a02 -= n.a02;
- m.a10 -= n.a10;
- m.a11 -= n.a11;
- m.a12 -= n.a12;
- m.a20 -= n.a20;
- m.a21 -= n.a21;
- m.a22 -= n.a22;
+ m.a00 -= n.a00;
+ m.a01 -= n.a01;
+ m.a02 -= n.a02;
+ m.a10 -= n.a10;
+ m.a11 -= n.a11;
+ m.a12 -= n.a12;
+ m.a20 -= n.a20;
+ m.a21 -= n.a21;
+ m.a22 -= n.a22;
}
-__device__ __host__ Matrix operator* (const Matrix &m, const Matrix &n)
+__device__ Matrix
+operator* (const Matrix &m, const Matrix &n)
{
- return make_matrix(m.a00*n.a00 + m.a01*n.a10 + m.a02*n.a20,
- m.a00*n.a01 + m.a01*n.a11 + m.a02*n.a21,
- m.a00*n.a02 + m.a01*n.a12 + m.a02*n.a22,
- m.a10*n.a00 + m.a11*n.a10 + m.a12*n.a20,
- m.a10*n.a01 + m.a11*n.a11 + m.a12*n.a21,
- m.a10*n.a02 + m.a11*n.a12 + m.a12*n.a22,
- m.a20*n.a00 + m.a21*n.a10 + m.a22*n.a20,
- m.a20*n.a01 + m.a21*n.a11 + m.a22*n.a21,
- m.a20*n.a02 + m.a21*n.a12 + m.a22*n.a22);
+ return make_matrix(m.a00*n.a00 + m.a01*n.a10 + m.a02*n.a20,
+ m.a00*n.a01 + m.a01*n.a11 + m.a02*n.a21,
+ m.a00*n.a02 + m.a01*n.a12 + m.a02*n.a22,
+ m.a10*n.a00 + m.a11*n.a10 + m.a12*n.a20,
+ m.a10*n.a01 + m.a11*n.a11 + m.a12*n.a21,
+ m.a10*n.a02 + m.a11*n.a12 + m.a12*n.a22,
+ m.a20*n.a00 + m.a21*n.a10 + m.a22*n.a20,
+ m.a20*n.a01 + m.a21*n.a11 + m.a22*n.a21,
+ m.a20*n.a02 + m.a21*n.a12 + m.a22*n.a22);
}
-__device__ __host__ Matrix operator+ (const Matrix &m, const float &c)
+__device__ Matrix
+operator+ (const Matrix &m, const float &c)
{
- return make_matrix(m.a00+c, m.a01+c, m.a02+c,
- m.a10+c, m.a11+c, m.a12+c,
- m.a20+c, m.a21+c, m.a22+c);
+ return make_matrix(m.a00+c, m.a01+c, m.a02+c,
+ m.a10+c, m.a11+c, m.a12+c,
+ m.a20+c, m.a21+c, m.a22+c);
}
-__device__ __host__ void operator+= (Matrix &m, const float &c)
+__device__ void
+operator+= (Matrix &m, const float &c)
{
- m.a00 += c;
- m.a01 += c;
- m.a02 += c;
- m.a10 += c;
- m.a11 += c;
- m.a12 += c;
- m.a20 += c;
- m.a21 += c;
- m.a22 += c;
+ m.a00 += c;
+ m.a01 += c;
+ m.a02 += c;
+ m.a10 += c;
+ m.a11 += c;
+ m.a12 += c;
+ m.a20 += c;
+ m.a21 += c;
+ m.a22 += c;
}
-__device__ __host__ Matrix operator+ (const float &c, const Matrix &m)
+__device__ Matrix
+operator+ (const float &c, const Matrix &m)
{
- return make_matrix(c+m.a00, c+m.a01, c+m.a02,
- c+m.a10, c+m.a11, c+m.a12,
- c+m.a20, c+m.a21, c+m.a22);
+ return make_matrix(c+m.a00, c+m.a01, c+m.a02,
+ c+m.a10, c+m.a11, c+m.a12,
+ c+m.a20, c+m.a21, c+m.a22);
}
-__device__ __host__ Matrix operator- (const Matrix &m, const float &c)
+__device__ Matrix
+operator- (const Matrix &m, const float &c)
{
- return make_matrix(m.a00-c, m.a01-c, m.a02-c,
- m.a10-c, m.a11-c, m.a12-c,
- m.a20-c, m.a21-c, m.a22-c);
+ return make_matrix(m.a00-c, m.a01-c, m.a02-c,
+ m.a10-c, m.a11-c, m.a12-c,
+ m.a20-c, m.a21-c, m.a22-c);
}
-__device__ __host__ void operator-= (Matrix &m, const float &c)
+__device__ void
+operator-= (Matrix &m, const float &c)
{
- m.a00 -= c;
- m.a01 -= c;
- m.a02 -= c;
- m.a10 -= c;
- m.a11 -= c;
- m.a12 -= c;
- m.a20 -= c;
- m.a21 -= c;
- m.a22 -= c;
+ m.a00 -= c;
+ m.a01 -= c;
+ m.a02 -= c;
+ m.a10 -= c;
+ m.a11 -= c;
+ m.a12 -= c;
+ m.a20 -= c;
+ m.a21 -= c;
+ m.a22 -= c;
}
-__device__ __host__ Matrix operator- (const float &c, const Matrix &m)
+__device__ Matrix
+operator- (const float &c, const Matrix &m)
{
- return make_matrix(c-m.a00, c-m.a01, c-m.a02,
- c-m.a10, c-m.a11, c-m.a12,
- c-m.a20, c-m.a21, c-m.a22);
+ return make_matrix(c-m.a00, c-m.a01, c-m.a02,
+ c-m.a10, c-m.a11, c-m.a12,
+ c-m.a20, c-m.a21, c-m.a22);
}
-__device__ __host__ Matrix operator* (const Matrix &m, const float &c)
+__device__ Matrix
+operator* (const Matrix &m, const float &c)
{
- return make_matrix(m.a00*c, m.a01*c, m.a02*c,
- m.a10*c, m.a11*c, m.a12*c,
- m.a20*c, m.a21*c, m.a22*c);
+ return make_matrix(m.a00*c, m.a01*c, m.a02*c,
+ m.a10*c, m.a11*c, m.a12*c,
+ m.a20*c, m.a21*c, m.a22*c);
}
-__device__ __host__ void operator*= (Matrix &m, const float &c)
+__device__ void
+operator*= (Matrix &m, const float &c)
{
- m.a00 *= c;
- m.a01 *= c;
- m.a02 *= c;
- m.a10 *= c;
- m.a11 *= c;
- m.a12 *= c;
- m.a20 *= c;
- m.a21 *= c;
- m.a22 *= c;
+ m.a00 *= c;
+ m.a01 *= c;
+ m.a02 *= c;
+ m.a10 *= c;
+ m.a11 *= c;
+ m.a12 *= c;
+ m.a20 *= c;
+ m.a21 *= c;
+ m.a22 *= c;
}
-__device__ __host__ Matrix operator* (const float &c, const Matrix &m)
+__device__ Matrix
+operator* (const float &c, const Matrix &m)
{
- return make_matrix(c*m.a00, c*m.a01, c*m.a02,
- c*m.a10, c*m.a11, c*m.a12,
- c*m.a20, c*m.a21, c*m.a22);
+ return make_matrix(c*m.a00, c*m.a01, c*m.a02,
+ c*m.a10, c*m.a11, c*m.a12,
+ c*m.a20, c*m.a21, c*m.a22);
}
-__device__ __host__ Matrix operator/ (const Matrix &m, const float &c)
+__device__ Matrix
+operator/ (const Matrix &m, const float &c)
{
- return make_matrix(m.a00/c, m.a01/c, m.a02/c,
- m.a10/c, m.a11/c, m.a12/c,
- m.a20/c, m.a21/c, m.a22/c);
+ return make_matrix(m.a00/c, m.a01/c, m.a02/c,
+ m.a10/c, m.a11/c, m.a12/c,
+ m.a20/c, m.a21/c, m.a22/c);
}
-__device__ __host__ void operator/= (Matrix &m, const float &c)
+__device__ void
+operator/= (Matrix &m, const float &c)
{
- m.a00 /= c;
- m.a01 /= c;
- m.a02 /= c;
- m.a10 /= c;
- m.a11 /= c;
- m.a12 /= c;
- m.a20 /= c;
- m.a21 /= c;
- m.a22 /= c;
+ m.a00 /= c;
+ m.a01 /= c;
+ m.a02 /= c;
+ m.a10 /= c;
+ m.a11 /= c;
+ m.a12 /= c;
+ m.a20 /= c;
+ m.a21 /= c;
+ m.a22 /= c;
}
-__device__ __host__ Matrix operator/ (const float &c, const Matrix &m)
+__device__ Matrix
+operator/ (const float &c, const Matrix &m)
{
- return make_matrix(c/m.a00, c/m.a01, c/m.a02,
- c/m.a10, c/m.a11, c/m.a12,
- c/m.a20, c/m.a21, c/m.a22);
+ return make_matrix(c/m.a00, c/m.a01, c/m.a02,
+ c/m.a10, c/m.a11, c/m.a12,
+ c/m.a20, c/m.a21, c/m.a22);
}
-__device__ __host__ float det(const Matrix &m)
+__device__ float
+det(const Matrix &m)
{
- return m.a00*(m.a11*m.a22 - m.a12*m.a21) - m.a10*(m.a01*m.a22 - m.a02*m.a21) + m.a20*(m.a01*m.a12 - m.a02*m.a11);
+ return m.a00*(m.a11*m.a22 - m.a12*m.a21) -
+ m.a10*(m.a01*m.a22 - m.a02*m.a21) +
+ m.a20*(m.a01*m.a12 - m.a02*m.a11);
}
-__device__ __host__ Matrix inv(const Matrix &m)
+__device__ Matrix
+inv(const Matrix &m)
{
- return make_matrix(m.a11*m.a22 - m.a12*m.a21,
- m.a02*m.a21 - m.a01*m.a22,
- m.a01*m.a12 - m.a02*m.a11,
- m.a12*m.a20 - m.a10*m.a22,
- m.a00*m.a22 - m.a02*m.a20,
- m.a02*m.a10 - m.a00*m.a12,
- m.a10*m.a21 - m.a11*m.a20,
- m.a01*m.a20 - m.a00*m.a21,
- m.a00*m.a11 - m.a01*m.a10)/det(m);
+ return make_matrix(m.a11*m.a22 - m.a12*m.a21,
+ m.a02*m.a21 - m.a01*m.a22,
+ m.a01*m.a12 - m.a02*m.a11,
+ m.a12*m.a20 - m.a10*m.a22,
+ m.a00*m.a22 - m.a02*m.a20,
+ m.a02*m.a10 - m.a00*m.a12,
+ m.a10*m.a21 - m.a11*m.a20,
+ m.a01*m.a20 - m.a00*m.a21,
+ m.a00*m.a11 - m.a01*m.a10)/det(m);
}
-__device__ __host__ Matrix outer(const float3 &a, const float3 &b)
+__device__ Matrix
+outer(const float3 &a, const float3 &b)
{
- return make_matrix(a.x*b.x, a.x*b.y, a.x*b.z,
- a.y*b.x, a.y*b.y, a.y*b.z,
- a.z*b.x, a.z*b.y, a.z*b.z);
+ return make_matrix(a.x*b.x, a.x*b.y, a.x*b.z,
+ a.y*b.x, a.y*b.y, a.y*b.z,
+ a.z*b.x, a.z*b.y, a.z*b.z);
}
#endif
diff --git a/src/mesh.h b/src/mesh.h
index f466470..0622144 100644
--- a/src/mesh.h
+++ b/src/mesh.h
@@ -2,187 +2,164 @@
#define __MESH_H__
#include "intersect.h"
+#include "geometry.h"
#define STACK_SIZE 500
-/* flattened triangle mesh */
-__device__ float3 *g_vertices;
-__device__ uint4 *g_triangles;
-__device__ unsigned int *g_colors;
-__device__ unsigned int g_start_node;
-__device__ unsigned int g_first_node;
-
-/* lower/upper bounds for the bounding box associated with each node/leaf */
-__device__ float3 *g_lower_bounds;
-__device__ float3 *g_upper_bounds;
-
-/* map to child node/triangle indices */
-texture<unsigned int, 1, cudaReadModeElementType> node_map;
-texture<unsigned int, 1, cudaReadModeElementType> node_map_end;
-
-__device__ float3 make_float3(const float4 &a)
+/* Tests the intersection between a ray and a node in the bounding volume
+ hierarchy. If the ray intersects the bounding volume and `min_distance`
+ is less than zero or the distance from `origin` to the intersection is
+ less than `min_distance`, return true, else return false. */
+__device__ bool
+intersect_node(const float3 &origin, const float3 &direction,
+ Geometry *geometry, const int &i, const float &min_distance)
{
- return make_float3(a.x, a.y, a.z);
-}
+ /* assigning these to local variables is faster for some reason */
+ float3 lower_bound = geometry->lower_bounds[i];
+ float3 upper_bound = geometry->upper_bounds[i];
-__device__ int convert(int c)
-{
- if (c & 0x80)
- return (0xFFFFFF00 | c);
- else
- return c;
-}
+ float distance_to_box;
-/* Test the intersection between a ray starting at `origin` traveling in the
- direction `direction` and the bounding box around node `i`. If the ray
- intersects the bounding box return true, else return false. */
-__device__ bool intersect_node(const float3 &origin, const float3 &direction, const int &i, const float &min_distance)
-{
- float3 lower_bound = g_lower_bounds[i];
- float3 upper_bound = g_upper_bounds[i];
+ if (intersect_box(origin, direction, lower_bound, upper_bound,
+ distance_to_box)) {
+ if (min_distance < 0.0f)
+ return true;
- float distance_to_box;
+ if (distance_to_box > min_distance)
+ return false;
- if (intersect_box(origin, direction, lower_bound, upper_bound, distance_to_box))
- {
- if (min_distance < 0.0f)
- return true;
+ return true;
+ }
+ else {
+ return false;
+ }
- if (distance_to_box > min_distance)
- return false;
-
- return true;
- }
- else
- {
- return false;
- }
-
-} // intersect_node
+}
-/* Find the intersection between a ray starting at `origin` traveling in the
- direction `direction` and the global mesh texture. If the ray intersects
- the texture return the index of the triangle which the ray intersected,
- else return -1. */
-__device__ int intersect_mesh(const float3 &origin, const float3& direction, float &min_distance, int last_hit_triangle = -1)
+/* Finds the intersection between a ray and `geometry`. If the ray does
+ intersect the mesh and the index of the intersected triangle is not equal
+ to `last_hit_triangle`, set `min_distance` to the distance from `origin` to
+ the intersection and return the index of the triangle which the ray
+ intersected, else return -1. */
+__device__ int
+intersect_mesh(const float3 &origin, const float3& direction,
+ Geometry *geometry, float &min_distance,
+ int last_hit_triangle = -1)
{
- int triangle_index = -1;
+ int triangle_index = -1;
- float distance;
- min_distance = -1.0f;
+ float distance;
+ min_distance = -1.0f;
- if (!intersect_node(origin, direction, g_start_node, min_distance))
- return -1;
+ if (!intersect_node(origin, direction, geometry, geometry->start_node,
+ min_distance))
+ return -1;
- unsigned int stack[STACK_SIZE];
+ unsigned int stack[STACK_SIZE];
- unsigned int *head = &stack[0];
- unsigned int *node = &stack[1];
- unsigned int *tail = &stack[STACK_SIZE-1];
- *node = g_start_node;
+ unsigned int *head = &stack[0];
+ unsigned int *node = &stack[1];
+ unsigned int *tail = &stack[STACK_SIZE-1];
+ *node = geometry->start_node;
- unsigned int i;
+ unsigned int i;
- do
- {
- unsigned int first_child = tex1Dfetch(node_map, *node);
- unsigned int stop = tex1Dfetch(node_map_end, *node);
+ do
+ {
+ unsigned int first_child = geometry->node_map[*node];
+ unsigned int stop = geometry->node_map_end[*node];
- while (*node >= g_first_node && stop == first_child+1)
- {
- *node = first_child;
- first_child = tex1Dfetch(node_map, *node);
- stop = tex1Dfetch(node_map_end, *node);
- }
+ while (*node >= geometry->first_node && stop == first_child+1) {
+ *node = first_child;
+ first_child = geometry->node_map[*node];
+ stop = geometry->node_map_end[*node];
+ }
- if (*node >= g_first_node)
- {
- for (i=first_child; i < stop; i++)
- {
- if (intersect_node(origin, direction, i, min_distance))
- {
- *node = i;
- node++;
- }
- }
-
- node--;
+ if (*node >= geometry->first_node) {
+ for (i=first_child; i < stop; i++) {
+ if (intersect_node(origin, direction, geometry, i,
+ min_distance)) {
+ *node = i;
+ node++;
+ }
+ }
+
+ node--;
+ }
+ else {
+ // node is a leaf
+ for (i=first_child; i < stop; i++) {
+ if (last_hit_triangle == i)
+ continue;
+
+ Triangle triangle = get_triangle(geometry, i);
+
+ if (intersect_triangle(origin, direction, triangle,
+ distance)) {
+ if (triangle_index == -1) {
+ triangle_index = i;
+ min_distance = distance;
+ continue;
+ }
+
+ if (distance < min_distance) {
+ triangle_index = i;
+ min_distance = distance;
+ }
}
- else // node is a leaf
- {
- for (i=first_child; i < stop; i++)
- {
- if (last_hit_triangle == i)
- continue;
-
- uint4 triangle_data = g_triangles[i];
-
- float3 v0 = g_vertices[triangle_data.x];
- float3 v1 = g_vertices[triangle_data.y];
- float3 v2 = g_vertices[triangle_data.z];
-
- if (intersect_triangle(origin, direction, v0, v1, v2, distance))
- {
- if (triangle_index == -1)
- {
- triangle_index = i;
- min_distance = distance;
- continue;
- }
-
- if (distance < min_distance)
- {
- triangle_index = i;
- min_distance = distance;
- }
- }
- } // triangle loop
-
- node--;
-
- } // node is a leaf
-
- } // while loop
- while (node != head);
-
- return triangle_index;
+ } // triangle loop
+
+ node--;
+
+ } // node is a leaf
+
+ } // while loop
+ while (node != head);
+
+ return triangle_index;
}
extern "C"
{
-__global__ void set_global_mesh_variables(uint4 *triangles, float3 *vertices, unsigned int *colors, unsigned int start_node, unsigned int first_node, float3 *lower_bounds, float3 *upper_bounds)
+__global__ void
+distance_to_mesh(int nthreads, float3 *positions, float3 *directions,
+ Geometry *g, float *distances)
{
- g_triangles = triangles;
- g_vertices = vertices;
- g_colors = colors;
- g_start_node = start_node;
- g_first_node = first_node;
- g_lower_bounds = lower_bounds;
- g_upper_bounds = upper_bounds;
-}
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
-__global__ void set_colors(unsigned int *colors)
-{
- g_colors = colors;
+ if (id >= nthreads)
+ return;
+
+ float3 position = positions[id];
+ float3 direction = directions[id];
+ direction /= norm(direction);
+
+ float distance;
+
+ int triangle_index = intersect_mesh(position, direction, g, distance);
+
+ if (triangle_index == -1)
+ distances[id] = 1e9;
+ else
+ distances[id] = distance;
}
-__global__ void color_solids(int first_triangle, int nthreads,
- int *solid_id_map,
- bool *solid_hit,
- unsigned int *solid_colors)
+__global__ void
+color_solids(int first_triangle, int nthreads, int *solid_id_map,
+ bool *solid_hit, unsigned int *solid_colors, Geometry *geometry)
{
- int id = blockIdx.x*blockDim.x + threadIdx.x;
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
- if (id >= nthreads)
- return;
+ if (id >= nthreads)
+ return;
- int triangle_id = first_triangle + id;
- int solid_id = solid_id_map[triangle_id];
- if (solid_hit[solid_id])
- g_colors[triangle_id] = solid_colors[solid_id];
+ int triangle_id = first_triangle + id;
+ int solid_id = solid_id_map[triangle_id];
+ if (solid_hit[solid_id])
+ geometry->colors[triangle_id] = solid_colors[solid_id];
}
-} // extern "c"
+} // extern "C"
#endif
diff --git a/src/photon.h b/src/photon.h
index e781864..8b7e588 100644
--- a/src/photon.h
+++ b/src/photon.h
@@ -3,309 +3,322 @@
#include "stdio.h"
#include "linalg.h"
-#include "materials.h"
#include "rotate.h"
#include "random.h"
#include "physical_constants.h"
#include "mesh.h"
+#include "geometry.h"
struct Photon
{
- float3 position;
- float3 direction;
- float3 polarization;
- float wavelength;
- float time;
+ float3 position;
+ float3 direction;
+ float3 polarization;
+ float wavelength;
+ float time;
- unsigned int history;
+ unsigned int history;
- int last_hit_triangle;
+ int last_hit_triangle;
};
struct State
{
- bool inside_to_outside;
+ bool inside_to_outside;
- float3 surface_normal;
+ float3 surface_normal;
- float refractive_index1, refractive_index2;
- float absorption_length;
- float scattering_length;
+ float refractive_index1, refractive_index2;
+ float absorption_length;
+ float scattering_length;
- int surface_index;
+ int surface_index;
- float distance_to_boundary;
+ float distance_to_boundary;
};
enum
{
- NO_HIT = 0x1 << 0,
- BULK_ABSORB = 0x1 << 1,
- SURFACE_DETECT = 0x1 << 2,
- SURFACE_ABSORB = 0x1 << 3,
- RAYLEIGH_SCATTER = 0x1 << 4,
- REFLECT_DIFFUSE = 0x1 << 5,
- REFLECT_SPECULAR = 0x1 << 6,
- NAN_ABORT = 0x1 << 31
+ NO_HIT = 0x1 << 0,
+ BULK_ABSORB = 0x1 << 1,
+ SURFACE_DETECT = 0x1 << 2,
+ SURFACE_ABSORB = 0x1 << 3,
+ RAYLEIGH_SCATTER = 0x1 << 4,
+ REFLECT_DIFFUSE = 0x1 << 5,
+ REFLECT_SPECULAR = 0x1 << 6,
+ NAN_ABORT = 0x1 << 31
}; // processes
enum {BREAK, CONTINUE, PASS}; // return value from propagate_to_boundary
-__device__ float get_theta(const float3 &a, const float3 &b)
+__device__ int
+convert(int c)
{
- return acosf(fmaxf(-1.0f,fminf(1.0f,dot(a,b))));
+ if (c & 0x80)
+ return (0xFFFFFF00 | c);
+ else
+ return c;
}
-__device__ void fill_state(State &s, Photon &p)
+__device__ float
+get_theta(const float3 &a, const float3 &b)
{
- p.last_hit_triangle = intersect_mesh(p.position, p.direction, s.distance_to_boundary, p.last_hit_triangle);
-
- if (p.last_hit_triangle == -1)
- {
- p.history |= NO_HIT;
- return;
- }
-
- uint4 triangle_data = g_triangles[p.last_hit_triangle];
-
- float3 v0 = g_vertices[triangle_data.x];
- float3 v1 = g_vertices[triangle_data.y];
- float3 v2 = g_vertices[triangle_data.z];
-
- int inner_material_index = convert(0xFF & (triangle_data.w >> 24));
- int outer_material_index = convert(0xFF & (triangle_data.w >> 16));
- s.surface_index = convert(0xFF & (triangle_data.w >> 8));
-
- s.surface_normal = cross(v1-v0, v2-v1);
- s.surface_normal /= norm(s.surface_normal);
-
- Material material1, material2;
- if (dot(s.surface_normal,-p.direction) > 0.0f)
- {
- // outside to inside
- material1 = materials[outer_material_index];
- material2 = materials[inner_material_index];
-
- s.inside_to_outside = false;
- }
- else
- {
- // inside to outside
- material1 = materials[inner_material_index];
- material2 = materials[outer_material_index];
- s.surface_normal = -s.surface_normal;
-
- s.inside_to_outside = true;
- }
+ return acosf(fmaxf(-1.0f,fminf(1.0f,dot(a,b))));
+}
- s.refractive_index1 = interp_property(p.wavelength, material1.refractive_index);
- s.refractive_index2 = interp_property(p.wavelength, material2.refractive_index);
- s.absorption_length = interp_property(p.wavelength, material1.absorption_length);
- s.scattering_length = interp_property(p.wavelength, material1.scattering_length);
+__device__ void
+fill_state(State &s, Photon &p, Geometry *g)
+{
+ p.last_hit_triangle = intersect_mesh(p.position, p.direction, g,
+ s.distance_to_boundary,
+ p.last_hit_triangle);
+
+ if (p.last_hit_triangle == -1) {
+ p.history |= NO_HIT;
+ return;
+ }
+
+ Triangle t = get_triangle(g, p.last_hit_triangle);
+
+ unsigned int material_code = g->material_codes[p.last_hit_triangle];
+
+ int inner_material_index = convert(0xFF & (material_code >> 24));
+ int outer_material_index = convert(0xFF & (material_code >> 16));
+ s.surface_index = convert(0xFF & (material_code >> 8));
+
+ float3 v01 = t.v1 - t.v0;
+ float3 v12 = t.v2 - t.v1;
+
+ s.surface_normal = normalize(cross(v01, v12));
+
+ Material *material1, *material2;
+ if (dot(s.surface_normal,-p.direction) > 0.0f) {
+ // outside to inside
+ material1 = g->materials[outer_material_index];
+ material2 = g->materials[inner_material_index];
+
+ s.inside_to_outside = false;
+ }
+ else {
+ // inside to outside
+ material1 = g->materials[inner_material_index];
+ material2 = g->materials[outer_material_index];
+ s.surface_normal = -s.surface_normal;
+
+ s.inside_to_outside = true;
+ }
+
+ s.refractive_index1 = interp_property(material1, p.wavelength,
+ material1->refractive_index);
+ s.refractive_index2 = interp_property(material2, p.wavelength,
+ material2->refractive_index);
+ s.absorption_length = interp_property(material1, p.wavelength,
+ material1->absorption_length);
+ s.scattering_length = interp_property(material1, p.wavelength,
+ material1->scattering_length);
} // fill_state
-__device__ float3 pick_new_direction(float3 axis, float theta, float phi)
+__device__ float3
+pick_new_direction(float3 axis, float theta, float phi)
{
- // Taken from SNOMAN rayscatter.for
- float cos_theta = cosf(theta);
- float sin_theta = sinf(theta);
- float cos_phi = cosf(phi);
- float sin_phi = sinf(phi);
+ // Taken from SNOMAN rayscatter.for
+ float cos_theta = cosf(theta);
+ float sin_theta = sinf(theta);
+ float cos_phi = cosf(phi);
+ float sin_phi = sinf(phi);
- float sin_axis_theta = sqrt(1.0f - axis.z*axis.z);
- float cos_axis_phi, sin_axis_phi;
+ float sin_axis_theta = sqrt(1.0f - axis.z*axis.z);
+ float cos_axis_phi, sin_axis_phi;
- if (isnan(sin_axis_theta) || sin_axis_theta < 0.00001f) {
- cos_axis_phi = 1.0f;
- sin_axis_phi = 0.0f;
- } else {
- cos_axis_phi = axis.x / sin_axis_theta;
- sin_axis_phi = axis.y / sin_axis_theta;
- }
-
- return make_float3(cos_theta*axis.x + sin_theta*(axis.z*cos_phi*cos_axis_phi - sin_phi*sin_axis_phi),
- cos_theta*axis.y + sin_theta*(cos_phi*axis.z*sin_axis_phi - sin_phi*cos_axis_phi),
- cos_theta*axis.z - sin_theta*cos_phi*sin_axis_theta);
+ if (isnan(sin_axis_theta) || sin_axis_theta < 0.00001f) {
+ cos_axis_phi = 1.0f;
+ sin_axis_phi = 0.0f;
+ }
+ else {
+ cos_axis_phi = axis.x / sin_axis_theta;
+ sin_axis_phi = axis.y / sin_axis_theta;
+ }
+
+ float dirx = cos_theta*axis.x +
+ sin_theta*(axis.z*cos_phi*cos_axis_phi - sin_phi*sin_axis_phi);
+ float diry = cos_theta*axis.y +
+ sin_theta*(cos_phi*axis.z*sin_axis_phi - sin_phi*cos_axis_phi);
+ float dirz = cos_theta*axis.z - sin_theta*cos_phi*sin_axis_theta;
+
+ return make_float3(dirx, diry, dirz);
}
-__device__ void rayleigh_scatter(Photon &p, curandState &rng)
+__device__ void
+rayleigh_scatter(Photon &p, curandState &rng)
{
- float cos_theta = 2.0f*cosf((acosf(1.0f - 2.0f*curand_uniform(&rng))-2*PI)/3.0f);
- if (cos_theta > 1.0f)
- cos_theta = 1.0f;
- else if (cos_theta < -1.0f)
- cos_theta = -1.0f;
-
- float theta = acosf(cos_theta);
- float phi = uniform(&rng, 0.0f, 2.0f * PI);
-
- p.direction = pick_new_direction(p.polarization, theta, phi);
-
- if (1.0f - fabsf(cos_theta) < 1e-6f) {
- p.polarization = pick_new_direction(p.polarization, PI/2.0f, phi);
- } else {
- // linear combination of old polarization and new direction
- p.polarization = p.polarization - cos_theta * p.direction;
- }
-
- p.direction /= norm(p.direction);
- p.polarization /= norm(p.polarization);
+ float cos_theta = 2.0f*cosf((acosf(1.0f - 2.0f*curand_uniform(&rng))-2*PI)/3.0f);
+ if (cos_theta > 1.0f)
+ cos_theta = 1.0f;
+ else if (cos_theta < -1.0f)
+ cos_theta = -1.0f;
+
+ float theta = acosf(cos_theta);
+ float phi = uniform(&rng, 0.0f, 2.0f * PI);
+
+ p.direction = pick_new_direction(p.polarization, theta, phi);
+
+ if (1.0f - fabsf(cos_theta) < 1e-6f) {
+ p.polarization = pick_new_direction(p.polarization, PI/2.0f, phi);
+ }
+ else {
+ // linear combination of old polarization and new direction
+ p.polarization = p.polarization - cos_theta * p.direction;
+ }
+
+ p.direction /= norm(p.direction);
+ p.polarization /= norm(p.polarization);
} // scatter
__device__ int propagate_to_boundary(Photon &p, State &s, curandState &rng)
{
- float absorption_distance = -s.absorption_length*logf(curand_uniform(&rng));
- float scattering_distance = -s.scattering_length*logf(curand_uniform(&rng));
+ float absorption_distance = -s.absorption_length*logf(curand_uniform(&rng));
+ float scattering_distance = -s.scattering_length*logf(curand_uniform(&rng));
- if (absorption_distance <= scattering_distance)
- {
- if (absorption_distance <= s.distance_to_boundary)
- {
- p.time += absorption_distance/(SPEED_OF_LIGHT/s.refractive_index1);
- p.position += absorption_distance*p.direction;
- p.history |= BULK_ABSORB;
+ if (absorption_distance <= scattering_distance) {
+ if (absorption_distance <= s.distance_to_boundary) {
+ p.time += absorption_distance/(SPEED_OF_LIGHT/s.refractive_index1);
+ p.position += absorption_distance*p.direction;
+ p.history |= BULK_ABSORB;
- p.last_hit_triangle = -1;
+ p.last_hit_triangle = -1;
- return BREAK;
- } // photon is absorbed in material1
- }
- else
- {
- if (scattering_distance <= s.distance_to_boundary)
- {
- p.time += scattering_distance/(SPEED_OF_LIGHT/s.refractive_index1);
- p.position += scattering_distance*p.direction;
+ return BREAK;
+ } // photon is absorbed in material1
+ }
+ else {
+ if (scattering_distance <= s.distance_to_boundary) {
+ p.time += scattering_distance/(SPEED_OF_LIGHT/s.refractive_index1);
+ p.position += scattering_distance*p.direction;
- rayleigh_scatter(p, rng);
+ rayleigh_scatter(p, rng);
- p.history |= RAYLEIGH_SCATTER;
+ p.history |= RAYLEIGH_SCATTER;
- p.last_hit_triangle = -1;
+ p.last_hit_triangle = -1;
- return CONTINUE;
- } // photon is scattered in material1
- } // if scattering_distance < absorption_distance
+ return CONTINUE;
+ } // photon is scattered in material1
+ } // if scattering_distance < absorption_distance
- p.position += s.distance_to_boundary*p.direction;
- p.time += s.distance_to_boundary/(SPEED_OF_LIGHT/s.refractive_index1);
+ p.position += s.distance_to_boundary*p.direction;
+ p.time += s.distance_to_boundary/(SPEED_OF_LIGHT/s.refractive_index1);
- return PASS;
+ return PASS;
} // propagate_to_boundary
-__device__ void propagate_at_boundary(Photon &p, State &s, curandState &rng)
+__device__ void
+propagate_at_boundary(Photon &p, State &s, curandState &rng)
{
- float incident_angle = get_theta(s.surface_normal,-p.direction);
- float refracted_angle = asinf(sinf(incident_angle)*s.refractive_index1/s.refractive_index2);
-
- float3 incident_plane_normal = cross(p.direction, s.surface_normal);
- float incident_plane_normal_length = norm(incident_plane_normal);
-
- // Photons at normal incidence do not have a unique plane of incidence,
- // so we have to pick the plane normal to be the polarization vector
- // to get the correct logic below
- if (incident_plane_normal_length < 1e-6f)
- incident_plane_normal = p.polarization;
- else
- incident_plane_normal /= incident_plane_normal_length;
-
- float normal_coefficient = dot(p.polarization, incident_plane_normal);
- float normal_probability = normal_coefficient*normal_coefficient;
-
- float reflection_coefficient;
- if (curand_uniform(&rng) < normal_probability)
- {
- reflection_coefficient = -sinf(incident_angle-refracted_angle)/sinf(incident_angle+refracted_angle);
-
- if ((curand_uniform(&rng) < reflection_coefficient*reflection_coefficient) || isnan(refracted_angle))
- {
- p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal);
+ float incident_angle = get_theta(s.surface_normal,-p.direction);
+ float refracted_angle = asinf(sinf(incident_angle)*s.refractive_index1/s.refractive_index2);
+
+ float3 incident_plane_normal = cross(p.direction, s.surface_normal);
+ float incident_plane_normal_length = norm(incident_plane_normal);
+
+ // Photons at normal incidence do not have a unique plane of incidence,
+ // so we have to pick the plane normal to be the polarization vector
+ // to get the correct logic below
+ if (incident_plane_normal_length < 1e-6f)
+ incident_plane_normal = p.polarization;
+ else
+ incident_plane_normal /= incident_plane_normal_length;
+
+ float normal_coefficient = dot(p.polarization, incident_plane_normal);
+ float normal_probability = normal_coefficient*normal_coefficient;
+
+ float reflection_coefficient;
+ if (curand_uniform(&rng) < normal_probability) {
+ // photon polarization normal to plane of incidence
+ reflection_coefficient = -sinf(incident_angle-refracted_angle)/sinf(incident_angle+refracted_angle);
+
+ if ((curand_uniform(&rng) < reflection_coefficient*reflection_coefficient) || isnan(refracted_angle)) {
+ p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal);
- p.history |= REFLECT_SPECULAR;
- }
- else
- {
- p.direction = rotate(s.surface_normal, PI-refracted_angle, incident_plane_normal);
- }
-
- p.polarization = incident_plane_normal;
- } // photon polarization normal to plane of incidence
- else
- {
- reflection_coefficient = tanf(incident_angle-refracted_angle)/tanf(incident_angle+refracted_angle);
-
- if ((curand_uniform(&rng) < reflection_coefficient*reflection_coefficient) || isnan(refracted_angle))
- {
- p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal);
-
- p.history |= REFLECT_SPECULAR;
- }
- else
- {
- p.direction = rotate(s.surface_normal, PI-refracted_angle, incident_plane_normal);
- }
-
- p.polarization = cross(incident_plane_normal, p.direction);
- p.polarization /= norm(p.polarization);
- } // photon polarization parallel to plane of incidence
-
-} // propagate_at_boundary
-
-__device__ int propagate_at_surface(Photon &p, State &s, curandState &rng)
-{
- Surface surface = surfaces[s.surface_index];
-
- float detect = interp_property(p.wavelength, surface.detect);
- float absorb = interp_property(p.wavelength, surface.absorb);
- float reflect_diffuse = interp_property(p.wavelength, surface.reflect_diffuse);
- float reflect_specular = interp_property(p.wavelength, surface.reflect_specular);
-
- // since the surface properties are interpolated linearly, we are
- // guaranteed that they still sum to 1.0.
+ p.history |= REFLECT_SPECULAR;
+ }
+ else {
+ p.direction = rotate(s.surface_normal, PI-refracted_angle, incident_plane_normal);
+ }
- float uniform_sample = curand_uniform(&rng);
+ p.polarization = incident_plane_normal;
+ }
+ else {
+ // photon polarization parallel to plane of incidence
+ reflection_coefficient = tanf(incident_angle-refracted_angle)/tanf(incident_angle+refracted_angle);
- if (uniform_sample < absorb)
- {
- p.history |= SURFACE_ABSORB;
- return BREAK;
+ if ((curand_uniform(&rng) < reflection_coefficient*reflection_coefficient) || isnan(refracted_angle)) {
+ p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal);
+
+ p.history |= REFLECT_SPECULAR;
}
- else if (uniform_sample < absorb + detect)
- {
- p.history |= SURFACE_DETECT;
- return BREAK;
+ else {
+ p.direction = rotate(s.surface_normal, PI-refracted_angle, incident_plane_normal);
}
- else if (uniform_sample < absorb + detect + reflect_diffuse)
- {
- // diffusely reflect
- p.direction = uniform_sphere(&rng);
- if (dot(p.direction, s.surface_normal) < 0.0f)
- p.direction = -p.direction;
+ p.polarization = cross(incident_plane_normal, p.direction);
+ p.polarization /= norm(p.polarization);
+ }
- // randomize polarization?
- p.polarization = cross(uniform_sphere(&rng), p.direction);
- p.polarization /= norm(p.polarization);
+} // propagate_at_boundary
+
+__device__ int
+propagate_at_surface(Photon &p, State &s, curandState &rng, Geometry *geometry)
+{
+ Surface *surface = geometry->surfaces[s.surface_index];
+
+ /* since the surface properties are interpolated linearly, we are
+ guaranteed that they still sum to 1.0. */
+
+ float detect = interp_property(surface, p.wavelength, surface->detect);
+ float absorb = interp_property(surface, p.wavelength, surface->absorb);
+ float reflect_diffuse = interp_property(surface, p.wavelength, surface->reflect_diffuse);
+ float reflect_specular = interp_property(surface, p.wavelength, surface->reflect_specular);
+
+ float uniform_sample = curand_uniform(&rng);
+
+ if (uniform_sample < absorb) {
+ p.history |= SURFACE_ABSORB;
+ return BREAK;
+ }
+ else if (uniform_sample < absorb + detect) {
+ p.history |= SURFACE_DETECT;
+ return BREAK;
+ }
+ else if (uniform_sample < absorb + detect + reflect_diffuse) {
+ // diffusely reflect
+ p.direction = uniform_sphere(&rng);
+
+ if (dot(p.direction, s.surface_normal) < 0.0f)
+ p.direction = -p.direction;
+
+ // randomize polarization?
+ p.polarization = cross(uniform_sphere(&rng), p.direction);
+ p.polarization /= norm(p.polarization);
- p.history |= REFLECT_DIFFUSE;
+ p.history |= REFLECT_DIFFUSE;
- return CONTINUE;
- }
- else
- {
- // specularly reflect
- float incident_angle = get_theta(s.surface_normal,-p.direction);
- float3 incident_plane_normal = cross(p.direction, s.surface_normal);
- incident_plane_normal /= norm(incident_plane_normal);
+ return CONTINUE;
+ }
+ else {
+ // specularly reflect
+ float incident_angle = get_theta(s.surface_normal,-p.direction);
+ float3 incident_plane_normal = cross(p.direction, s.surface_normal);
+ incident_plane_normal /= norm(incident_plane_normal);
- p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal);
+ p.direction = rotate(s.surface_normal, incident_angle,
+ incident_plane_normal);
- p.history |= REFLECT_SPECULAR;
+ p.history |= REFLECT_SPECULAR;
- return CONTINUE;
- }
+ return CONTINUE;
+ }
} // propagate_at_surface
diff --git a/src/propagate.cu b/src/propagate.cu
new file mode 100644
index 0000000..2d5183e
--- /dev/null
+++ b/src/propagate.cu
@@ -0,0 +1,100 @@
+//-*-c-*-
+
+#include "linalg.h"
+#include "mesh.h"
+#include "geometry.h"
+#include "photon.h"
+
+extern "C"
+{
+
+__global__ void
+propagate(int first_photon, int nthreads, unsigned int *input_queue,
+ unsigned int *output_queue, curandState *rng_states,
+ float3 *positions, float3 *directions,
+ float *wavelengths, float3 *polarizations,
+ float *times, unsigned int *histories,
+ int *last_hit_triangles, int max_steps,
+ Geometry *geometry)
+{
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
+
+ if (id >= nthreads)
+ return;
+
+ curandState rng = rng_states[id];
+
+ int photon_id = input_queue[first_photon + id];
+
+ Photon p;
+ p.position = positions[photon_id];
+ p.direction = directions[photon_id];
+ p.direction /= norm(p.direction);
+ p.polarization = polarizations[photon_id];
+ p.polarization /= norm(p.polarization);
+ p.wavelength = wavelengths[photon_id];
+ p.time = times[photon_id];
+ p.last_hit_triangle = last_hit_triangles[photon_id];
+ p.history = histories[photon_id];
+
+ if (p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB))
+ return;
+
+ State s;
+
+ int steps = 0;
+ while (steps < max_steps) {
+ steps++;
+
+ int command;
+
+ // check for NaN and fail
+ if (isnan(p.direction.x*p.direction.y*p.direction.z*p.position.x*p.position.y*p.position.z)) {
+ p.history |= NO_HIT | NAN_ABORT;
+ break;
+ }
+
+ fill_state(s, p, geometry);
+
+ if (p.last_hit_triangle == -1)
+ break;
+
+ command = propagate_to_boundary(p, s, rng);
+
+ if (command == BREAK)
+ break;
+
+ if (command == CONTINUE)
+ continue;
+
+ if (s.surface_index != -1) {
+ command = propagate_at_surface(p, s, rng, geometry);
+
+ if (command == BREAK)
+ break;
+
+ if (command == CONTINUE)
+ continue;
+ }
+
+ propagate_at_boundary(p, s, rng);
+
+ } // while (steps < max_steps)
+
+ rng_states[id] = rng;
+ positions[photon_id] = p.position;
+ directions[photon_id] = p.direction;
+ polarizations[photon_id] = p.polarization;
+ wavelengths[photon_id] = p.wavelength;
+ times[photon_id] = p.time;
+ histories[photon_id] = p.history;
+ last_hit_triangles[photon_id] = p.last_hit_triangle;
+
+ // Not done, put photon in output queue
+ if ((p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB)) == 0) {
+ int out_idx = atomicAdd(output_queue, 1);
+ output_queue[out_idx] = photon_id;
+ }
+} // propagate
+
+} // extern "C"
diff --git a/src/random.h b/src/random.h
index 6abc415..e9d2e75 100644
--- a/src/random.h
+++ b/src/random.h
@@ -5,76 +5,81 @@
#include "physical_constants.h"
-__device__ float uniform(curandState *s, const float &low, const float &high)
+__device__ float
+uniform(curandState *s, const float &low, const float &high)
{
- return low + curand_uniform(s)*(high-low);
+ return low + curand_uniform(s)*(high-low);
}
-__device__ float3 uniform_sphere(curandState *s)
+__device__ float3
+uniform_sphere(curandState *s)
{
- float theta = uniform(s, 0.0f, 2*PI);
- float u = uniform(s, -1.0f, 1.0f);
- float c = sqrtf(1.0f-u*u);
+ float theta = uniform(s, 0.0f, 2*PI);
+ float u = uniform(s, -1.0f, 1.0f);
+ float c = sqrtf(1.0f-u*u);
- return make_float3(c*cosf(theta), c*sinf(theta), u);
+ return make_float3(c*cosf(theta), c*sinf(theta), u);
}
// Draw a random sample given a cumulative distribution function
// Assumptions: ncdf >= 2, cdf_y[0] is 0.0, and cdf_y[ncdf-1] is 1.0
-__device__ float sample_cdf(curandState *rng, int ncdf,
- float *cdf_x, float *cdf_y)
+__device__ float
+sample_cdf(curandState *rng, int ncdf, float *cdf_x, float *cdf_y)
{
- float u = curand_uniform(rng);
-
- // Find u in cdf_y with binary search
- // list must contain at least 2 elements: 0.0 and 1.0
- int lower=0;
- int upper=ncdf-1;
- while(lower < upper-1) {
- int half = (lower+upper) / 2;
- if (u < cdf_y[half])
- upper = half;
- else
- lower = half;
- }
+ float u = curand_uniform(rng);
+
+ // Find u in cdf_y with binary search
+ // list must contain at least 2 elements: 0.0 and 1.0
+ int lower=0;
+ int upper=ncdf-1;
+ while(lower < upper-1) {
+ int half = (lower+upper) / 2;
+ if (u < cdf_y[half])
+ upper = half;
+ else
+ lower = half;
+ }
- float frac = (u - cdf_y[lower]) / (cdf_y[upper] - cdf_y[lower]);
- return cdf_x[lower] * frac + cdf_x[upper] * (1.0f - frac);
+ float frac = (u - cdf_y[lower]) / (cdf_y[upper] - cdf_y[lower]);
+ return cdf_x[lower] * frac + cdf_x[upper] * (1.0f - frac);
}
-
extern "C"
{
-__global__ void init_rng(int nthreads, curandState *s, unsigned long long seed, unsigned long long offset)
+__global__ void
+init_rng(int nthreads, curandState *s, unsigned long long seed,
+ unsigned long long offset)
{
- int id = blockIdx.x*blockDim.x + threadIdx.x;
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
- if (id >= nthreads)
- return;
+ if (id >= nthreads)
+ return;
- curand_init(seed, id, offset, &s[id]);
+ curand_init(seed, id, offset, &s[id]);
}
-__global__ void fill_uniform(int nthreads, curandState *s, float *a, float low, float high)
+__global__ void
+fill_uniform(int nthreads, curandState *s, float *a, float low, float high)
{
- int id = blockIdx.x*blockDim.x + threadIdx.x;
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
- if (id >= nthreads)
- return;
+ if (id >= nthreads)
+ return;
- a[id] = uniform(&s[id], low, high);
+ a[id] = uniform(&s[id], low, high);
}
-__global__ void fill_uniform_sphere(int nthreads, curandState *s, float3 *a)
+__global__ void
+fill_uniform_sphere(int nthreads, curandState *s, float3 *a)
{
- int id = blockIdx.x*blockDim.x + threadIdx.x;
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
- if (id >= nthreads)
- return;
+ if (id >= nthreads)
+ return;
- a[id] = uniform_sphere(&s[id]);
+ a[id] = uniform_sphere(&s[id]);
}
} // extern "c"
diff --git a/src/render.cu b/src/render.cu
new file mode 100644
index 0000000..e5b4ac5
--- /dev/null
+++ b/src/render.cu
@@ -0,0 +1,169 @@
+//-*-c-*-
+
+#include "linalg.h"
+#include "intersect.h"
+#include "mesh.h"
+#include "sorting.h"
+#include "geometry.h"
+
+#include "stdio.h"
+
+__device__ float4
+get_color(const float3 &direction, const Triangle &triangle, unsigned int rgba)
+{
+ float3 v01 = triangle.v1 - triangle.v0;
+ float3 v12 = triangle.v2 - triangle.v1;
+
+ float3 surface_normal = normalize(cross(v01,v12));
+
+ float cos_theta = dot(surface_normal,-direction);
+
+ if (cos_theta < 0.0f)
+ cos_theta = dot(-surface_normal,-direction);
+
+ unsigned int a0 = 0xff & (rgba >> 24);
+ unsigned int r0 = 0xff & (rgba >> 16);
+ unsigned int g0 = 0xff & (rgba >> 8);
+ unsigned int b0 = 0xff & rgba;
+
+ float alpha = (255 - a0)/255.0f;
+
+ return make_float4(r0*cos_theta, g0*cos_theta, b0*cos_theta, alpha);
+}
+
+extern "C"
+{
+
+__global__ void
+render(int nthreads, float3 *_origin, float3 *_direction, Geometry *geometry,
+ unsigned int alpha_depth, unsigned int *pixels, float *_dx,
+ unsigned int *dxlen, float4 *_color)
+{
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
+
+ if (id >= nthreads)
+ return;
+
+ float3 origin = _origin[id];
+ float3 direction = _direction[id];
+ unsigned int n = dxlen[id];
+
+ float distance;
+
+ if (n < 1 && !intersect_node(origin, direction, geometry,
+ geometry->start_node, -1.0f)) {
+ pixels[id] = 0;
+ return;
+ }
+
+ unsigned int stack[STACK_SIZE];
+
+ unsigned int *head = &stack[0];
+ unsigned int *node = &stack[1];
+ unsigned int *tail = &stack[STACK_SIZE-1];
+ *node = geometry->start_node;
+
+ float *dx = _dx + id*alpha_depth;
+ float4 *color_a = _color + id*alpha_depth;
+
+ unsigned int i;
+
+ do {
+ unsigned int first_child = geometry->node_map[*node];
+ unsigned int stop = geometry->node_map_end[*node];
+
+ while (*node >= geometry->first_node && stop == first_child+1) {
+ *node = first_child;
+ first_child = geometry->node_map[*node];
+ stop = geometry->node_map_end[*node];
+ }
+
+ if (*node >= geometry->first_node) {
+ for (i=first_child; i < stop; i++) {
+ if (intersect_node(origin, direction, geometry, i, -1.0f)) {
+ *node = i;
+ node++;
+ }
+ }
+
+ node--;
+ }
+ else {
+ // node is a leaf
+ for (i=first_child; i < stop; i++) {
+ Triangle triangle = get_triangle(geometry, i);
+
+ if (intersect_triangle(origin, direction, triangle,
+ distance)) {
+ if (n < 1) {
+ dx[0] = distance;
+
+ unsigned int rgba = geometry->colors[i];
+ float4 color = get_color(direction, triangle, rgba);
+
+ color_a[0] = color;
+ }
+ else {
+ unsigned long j = searchsorted(n, dx, distance);
+
+ if (j <= alpha_depth-1) {
+ insert(alpha_depth, dx, j, distance);
+
+ unsigned int rgba = geometry->colors[i];
+ float4 color = get_color(direction, triangle,
+ rgba);
+
+ insert(alpha_depth, color_a, j, color);
+ }
+ }
+
+ if (n < alpha_depth)
+ n++;
+ }
+
+ } // triangle loop
+
+ node--;
+
+ } // node is a leaf
+
+ } // while loop
+ while (node != head);
+
+ if (n < 1) {
+ pixels[id] = 0;
+ return;
+ }
+
+ dxlen[id] = n;
+
+ float scale = 1.0f;
+ float fr = 0.0f;
+ float fg = 0.0f;
+ float fb = 0.0f;
+ for (i=0; i < n; i++) {
+ float alpha;
+ if (i < alpha_depth-1)
+ alpha = color_a[i].w;
+ else
+ alpha = 1.0;
+
+ fr += scale*color_a[i].x*alpha;
+ fg += scale*color_a[i].y*alpha;
+ fb += scale*color_a[i].z*alpha;
+
+ scale *= (1.0f-alpha);
+ }
+ unsigned int a;
+ if (n < alpha_depth)
+ a = floorf(255*(1.0f-scale));
+ else
+ a = 255;
+ unsigned int r = floorf(fr);
+ unsigned int g = floorf(fg);
+ unsigned int b = floorf(fb);
+
+ pixels[id] = a << 24 | r << 16 | g << 8 | b;
+}
+
+} // extern "C"
diff --git a/src/rotate.h b/src/rotate.h
index 7fc6f7e..15f8037 100644
--- a/src/rotate.h
+++ b/src/rotate.h
@@ -6,21 +6,22 @@
__device__ const Matrix IDENTITY_MATRIX = {1,0,0,0,1,0,0,0,1};
-__device__ __host__ Matrix make_rotation_matrix(float phi, const float3 &n)
+__device__ Matrix
+make_rotation_matrix(float phi, const float3 &n)
{
- /* rotate points counterclockwise, when looking towards +infinity,
- through an angle `phi` about the axis `n`. */
+ float cos_phi = cosf(phi);
+ float sin_phi = sinf(phi);
- float cos_phi = cosf(phi);
- float sin_phi = sinf(phi);
-
- return IDENTITY_MATRIX*cos_phi + (1-cos_phi)*outer(n,n) +
- sin_phi*make_matrix(0,n.z,-n.y,-n.z,0,n.x,n.y,-n.x,0);
+ return IDENTITY_MATRIX*cos_phi + (1-cos_phi)*outer(n,n) +
+ sin_phi*make_matrix(0,n.z,-n.y,-n.z,0,n.x,n.y,-n.x,0);
}
-__device__ __host__ float3 rotate(const float3 &a, float phi, const float3 &n)
+/* rotate points counterclockwise, when looking towards +infinity,
+ through an angle `phi` about the axis `n`. */
+__device__ float3
+rotate(const float3 &a, float phi, const float3 &n)
{
- return make_rotation_matrix(phi,n)*a;
+ return make_rotation_matrix(phi,n)*a;
}
#endif
diff --git a/src/sorting.h b/src/sorting.h
index ec16bbe..03eabc6 100644
--- a/src/sorting.h
+++ b/src/sorting.h
@@ -1,103 +1,105 @@
template <class T>
-__device__ void swap(T &a, T &b)
+__device__ void
+swap(T &a, T &b)
{
- T tmp = a;
- a = b;
- b = tmp;
+ T tmp = a;
+ a = b;
+ b = tmp;
}
template <class T>
-__device__ void reverse(int n, T *a)
+__device__ void
+reverse(int n, T *a)
{
- for (int i=0; i < n/2; i++)
- swap(a[i],a[n-1-i]);
+ for (int i=0; i < n/2; i++)
+ swap(a[i],a[n-1-i]);
}
template <class T>
-__device__ void piksrt(int n, T *arr)
+__device__ void
+piksrt(int n, T *arr)
{
- int i,j;
- T a;
+ int i,j;
+ T a;
- for (j=1; j < n; j++)
- {
- a = arr[j];
- i = j-1;
- while (i >= 0 && arr[i] > a)
- {
- arr[i+1] = arr[i];
- i--;
- }
- arr[i+1] = a;
+ for (j=1; j < n; j++) {
+ a = arr[j];
+ i = j-1;
+ while (i >= 0 && arr[i] > a) {
+ arr[i+1] = arr[i];
+ i--;
}
+ arr[i+1] = a;
+ }
}
template <class T, class U>
-__device__ void piksrt2(int n, T *arr, U *brr)
+__device__ void
+piksrt2(int n, T *arr, U *brr)
{
- int i,j;
- T a;
- U b;
+ int i,j;
+ T a;
+ U b;
- for (j=1; j < n; j++)
- {
- a = arr[j];
- b = brr[j];
- i = j-1;
- while (i >= 0 && arr[i] > a)
- {
- arr[i+1] = arr[i];
- brr[i+1] = brr[i];
- i--;
- }
- arr[i+1] = a;
- brr[i+1] = b;
+ for (j=1; j < n; j++) {
+ a = arr[j];
+ b = brr[j];
+ i = j-1;
+ while (i >= 0 && arr[i] > a) {
+ arr[i+1] = arr[i];
+ brr[i+1] = brr[i];
+ i--;
}
+ arr[i+1] = a;
+ brr[i+1] = b;
+ }
}
template <class T>
-__device__ unsigned long searchsorted(unsigned long n, T *arr, const T &x)
+__device__ unsigned long
+searchsorted(unsigned long n, T *arr, const T &x)
{
- unsigned long ju,jm,jl;
- int ascnd;
+ unsigned long ju,jm,jl;
+ int ascnd;
- jl = 0;
- ju = n;
+ jl = 0;
+ ju = n;
- ascnd = (arr[n-1] > arr[0]);
+ ascnd = (arr[n-1] > arr[0]);
- while (ju-jl > 1)
- {
- jm = (ju+jl) >> 1;
+ while (ju-jl > 1) {
+ jm = (ju+jl) >> 1;
- if (x >= arr[jm] == ascnd)
- jl = jm;
- else
- ju = jm;
- }
-
- if (x <= arr[0])
- return 0;
- else if (x == arr[n-1])
- return n-1;
+ if (x >= arr[jm] == ascnd)
+ jl = jm;
else
- return ju;
+ ju = jm;
+ }
+
+ if (x <= arr[0])
+ return 0;
+ else if (x == arr[n-1])
+ return n-1;
+ else
+ return ju;
}
template <class T>
-__device__ void insert(unsigned long n, T *arr, unsigned long i, const T &x)
+__device__ void
+insert(unsigned long n, T *arr, unsigned long i, const T &x)
{
- unsigned long j;
- for (j=n-1; j > i; j--)
- arr[j] = arr[j-1];
- arr[i] = x;
+ unsigned long j;
+ for (j=n-1; j > i; j--)
+ arr[j] = arr[j-1];
+ arr[i] = x;
}
template <class T>
-__device__ void add_sorted(unsigned long n, T *arr, const T &x)
+__device__ void
+add_sorted(unsigned long n, T *arr, const T &x)
{
- unsigned long i = searchsorted(n, arr, x);
+ unsigned long i = searchsorted(n, arr, x);
- if (i < n)
- insert(n, arr, i, x);
+ if (i < n)
+ insert(n, arr, i, x);
}
diff --git a/src/tools.cu b/src/tools.cu
index 3d3fed7..80d06ec 100644
--- a/src/tools.cu
+++ b/src/tools.cu
@@ -3,15 +3,19 @@
extern "C"
{
-__global__ void interleave(int nthreads, unsigned long long *x, unsigned long long *y, unsigned long long *z, int bits, unsigned long long *dest)
+__global__ void
+interleave(int nthreads, unsigned long long *x, unsigned long long *y,
+ unsigned long long *z, int bits, unsigned long long *dest)
{
- int id = blockIdx.x*blockDim.x + threadIdx.x;
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
- if (id >= nthreads)
- return;
+ if (id >= nthreads)
+ return;
- for (int i=0; i < bits; i++)
- dest[id] |= (x[id] & 1 << i) << (2*i) | (y[id] & 1 << i) << (2*i+1) | (z[id] & 1 << i) << (2*i+2);
+ for (int i=0; i < bits; i++)
+ dest[id] |= (x[id] & 1 << i) << (2*i) |
+ (y[id] & 1 << i) << (2*i+1) |
+ (z[id] & 1 << i) << (2*i+2);
}
-}
+} // extern "C"
diff --git a/src/transform.cu b/src/transform.cu
index 57bd509..1f4405e 100644
--- a/src/transform.cu
+++ b/src/transform.cu
@@ -7,41 +7,45 @@ extern "C"
{
/* Translate the points `a` by the vector `v` */
-__global__ void translate(int nthreads, float3 *a, float3 v)
+__global__ void
+translate(int nthreads, float3 *a, float3 v)
{
- int id = blockIdx.x*blockDim.x + threadIdx.x;
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
- if (id >= nthreads)
- return;
+ if (id >= nthreads)
+ return;
- a[id] += v;
+ a[id] += v;
}
/* Rotate the points `a` through an angle `phi` counter-clockwise about the
axis `axis` (when looking towards +infinity). */
-__global__ void rotate(int nthreads, float3 *a, float phi, float3 axis)
+__global__ void
+rotate(int nthreads, float3 *a, float phi, float3 axis)
{
- int id = blockIdx.x*blockDim.x + threadIdx.x;
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
- if (id >= nthreads)
- return;
+ if (id >= nthreads)
+ return;
- a[id] = rotate(a[id], phi, axis);
+ a[id] = rotate(a[id], phi, axis);
}
/* Rotate the points `a` through an angle `phi` counter-clockwise
(when looking towards +infinity along `axis`) about the axis defined
by the point `point` and the vector `axis` . */
-__global__ void rotate_around_point(int nthreads, float3 *a, float phi, float3 axis, float3 point)
+__global__ void
+rotate_around_point(int nthreads, float3 *a, float phi, float3 axis,
+ float3 point)
{
- int id = blockIdx.x*blockDim.x + threadIdx.x;
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
- if (id >= nthreads)
- return;
+ if (id >= nthreads)
+ return;
- a[id] -= point;
- a[id] = rotate(a[id], phi, axis);
- a[id] += point;
+ a[id] -= point;
+ a[id] = rotate(a[id], phi, axis);
+ a[id] += point;
}
} // extern "c"