aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README4
-rw-r--r--src/Makefile7
-rw-r--r--src/fit.c194
-rw-r--r--src/hdf5_utils.c193
-rw-r--r--src/hdf5_utils.h85
-rw-r--r--src/zdab-cat.c117
-rw-r--r--src/zdab_utils.c18
-rw-r--r--src/zdab_utils.h32
-rw-r--r--utils/Makefile1
-rwxr-xr-xutils/cat-grid-jobs163
-rw-r--r--utils/fit-wrapper7
-rwxr-xr-xutils/plot171
-rwxr-xr-xutils/plot-energy294
-rwxr-xr-xutils/plot-fit-results297
-rwxr-xr-xutils/submit-grid-jobs152
15 files changed, 1063 insertions, 672 deletions
diff --git a/README b/README
index e1b6f60..ce5dbc9 100644
--- a/README
+++ b/README
@@ -1,10 +1,10 @@
Building
========
-First you have to install the gsl and nlopt libraries. On Fedora you can
+First you have to install the gsl, nlopt, and hdf5 libraries. On Fedora you can
install these with the following command:
- $ yum install gsl gsl-devel NLopt NLopt-devel
+ $ yum install gsl gsl-devel NLopt NLopt-devel hdf5 hdf5-devel
You can also install these packages yourself. For instructions, see below
(Installing GSL and Installing NLopt).
diff --git a/src/Makefile b/src/Makefile
index 99cff5e..e5ca1cd 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,7 +1,7 @@
release_hdr := $(shell sh -c './mkreleasehdr.sh')
CFLAGS=-fdiagnostics-color -O2 -Wall -g -DSWAP_BYTES
-LDLIBS=-fdiagnostics-color -lm -lgsl -lgslcblas -lnlopt_cxx -lstdc++ -lz
+LDLIBS=-fdiagnostics-color -lm -lgsl -lgslcblas -lnlopt_cxx -lstdc++ -lz -lhdf5
PREFIX?=$(HOME)/local
INSTALL_BIN=$(PREFIX)/bin
@@ -32,17 +32,18 @@ test-time-pdf: test-time-pdf.o zebra.o likelihood.o pmt.o vector.o misc.o muon.o
test-zebra: test-zebra.o zebra.o pack2b.o
-fit: fit.o zebra.o likelihood.o pmt.o vector.o misc.o muon.o optics.o quantum_efficiency.o solid_angle.o pdg.o scattering.o zdab_utils.o pack2b.o sno_charge.o db.o dqxx.o dict.o siphash.o path.o pmt_response.o release.o electron.o proton.o find_peaks.o quad.o dc.o sort.o util.o
+fit: fit.o zebra.o likelihood.o pmt.o vector.o misc.o muon.o optics.o quantum_efficiency.o solid_angle.o pdg.o scattering.o zdab_utils.o pack2b.o sno_charge.o db.o dqxx.o dict.o siphash.o path.o pmt_response.o release.o electron.o proton.o find_peaks.o quad.o dc.o sort.o util.o hdf5_utils.o
test-find-peaks: test-find-peaks.o zebra.o likelihood.o pmt.o vector.o misc.o muon.o optics.o quantum_efficiency.o solid_angle.o pdg.o scattering.o zdab_utils.o pack2b.o sno_charge.o db.o dqxx.o dict.o siphash.o path.o pmt_response.o release.o electron.o proton.o find_peaks.o util.o quad.o
calculate-csda-range: calculate-csda-range.o
-zdab-cat: zdab-cat.o zebra.o pmt.o vector.o misc.o zdab_utils.o pack2b.o db.o dqxx.o dict.o siphash.o release.o dc.o sort.o util.o
+zdab-cat: zdab-cat.o zebra.o pmt.o vector.o misc.o zdab_utils.o pack2b.o db.o dqxx.o dict.o siphash.o release.o dc.o sort.o util.o hdf5_utils.o
install:
@mkdir -p $(INSTALL_BIN)
$(INSTALL) fit $(INSTALL_BIN)
+ $(INSTALL) zdab-cat $(INSTALL_BIN)
clean:
rm -f *.o calculate_limits test test-vector test-likelihood fit test-charge test-path calculate-csda-range test-time-pdf test-zebra test-find-peaks Makefile.dep zdab-cat
diff --git a/src/fit.c b/src/fit.c
index adc6ebc..0360a8f 100644
--- a/src/fit.c
+++ b/src/fit.c
@@ -47,12 +47,21 @@
#include "electron.h"
#include "muon.h"
#include "proton.h"
+#include "hdf5.h"
+#include "hdf5_utils.h"
/* Maximum number of fit parameters. Should be at least 4 + 3*MAX_VERTICES. */
#define MAX_PARS 100
+
/* Maximum number of peaks to search for in Hough transform. */
#define MAX_NPEAKS 10
+/* Maximum number of events, primary seed particle tracks, and fits.
+ *
+ * FIXME: I should write out the events, tracks, and fits as they come up
+ * instead of storing them until the end. */
+#define MAX_NEVENTS 1000000
+
char *GitSHA1(void);
char *GitDirty(void);
@@ -5869,12 +5878,13 @@ int main(int argc, char **argv)
{
int i, j, k;
zebraFile *f;
- zebraBank bmast, bmc, bmcgn, mctk, b;
+ zebraBank bmast, mc, bmcgn, mctk, b;
int rv;
EVBank bev;
FTPVBank bftpv;
FTXKBank bftxk;
RSPBank bftxr;
+ MCBank bmc;
MCTKBank bmctk;
MCVXBank bmcvx;
event ev = {0};
@@ -5882,7 +5892,6 @@ int main(int argc, char **argv)
double xopt[MAX_PARS];
char *filename = NULL;
char *output = NULL;
- FILE *fout = NULL;
int skip_second_event = 0;
struct timeval tv_start, tv_stop;
long long elapsed;
@@ -5892,12 +5901,29 @@ int main(int argc, char **argv)
int id[MAX_VERTICES];
size_t combos[100];
size_t len;
- char tmp[256];
- size_t nhit, min_nhit = 100;
+ size_t min_nhit = 100;
int last_run;
char dqxx_file[256];
int32_t gtid = -1;
+ int nevents = 0;
+ /* Array of events to write out to HDF5 file.
+ *
+ * Note: Declared static since otherwise it will cause a stack overflow. */
+ static HDF5Event hdf5_events[MAX_NEVENTS];
+
+ int nmcgn = 0;
+ /* Array of primary seed tracks to write out to HDF5 file.
+ *
+ * Note: Declared static since otherwise it will cause a stack overflow. */
+ static HDF5MCGN hdf5_mcgn[MAX_NEVENTS];
+
+ int nfits = 0;
+ /* Array of fit results to write out to HDF5 file.
+ *
+ * Note: Declared static since otherwise it will cause a stack overflow. */
+ static HDF5Fit hdf5_fits[MAX_NEVENTS];
+
for (i = 1; i < argc; i++) {
if (strlen(argv[i]) >= 2 && !strncmp(argv[i], "--", 2)) {
if (!strcmp(argv[i]+2,"skip-second-event")) {
@@ -5953,22 +5979,8 @@ int main(int argc, char **argv)
return 1;
}
- if (output) {
- fout = fopen(output, "w");
-
- if (!fout) {
- fprintf(stderr, "failed to open '%s': %s\n", output, strerror(errno));
- return 1;
- }
-
- fprintf(fout, "git_sha1: %s\n", GitSHA1());
- fprintf(fout, "git_dirty: %s\n", GitDirty());
- fprintf(fout, "data:\n");
- }
-
if (load_pmt_info()) {
zebra_close(f);
- if (output) fclose(fout);
return 1;
}
@@ -6038,30 +6050,29 @@ int main(int argc, char **argv)
continue;
}
- if (fout) fprintf(fout, " -\n");
-
if (bmast.links[KMAST_MC-1] == 0) goto skip_mc;
- rv = zebra_get_bank(f,&bmc,bmast.links[KMAST_MC-1]);
+ rv = zebra_get_bank(f,&mc,bmast.links[KMAST_MC-1]);
if (rv) {
fprintf(stderr, "error getting MC bank: %s\n", zebra_err);
goto err;
}
+ unpack_mc(mc.data, &bmc);
+
if (bmast.links[KMC_MCGN-1] == 0) {
fprintf(stderr, "MCGN link is zero!\n");
goto err;
}
- rv = zebra_get_bank(f,&bmcgn,bmc.links[KMC_MCGN-1]);
+ rv = zebra_get_bank(f,&bmcgn,mc.links[KMC_MCGN-1]);
if (rv) {
fprintf(stderr, "error getting MCGN bank: %s\n", zebra_err);
goto err;
}
- if (fout) fprintf(fout, " mcgn:\n");
while (1) {
if (bmcgn.links[KMCGN_MCTK-1] == 0) {
fprintf(stderr, "MCTK link is zero!\n");
@@ -6110,17 +6121,18 @@ int main(int argc, char **argv)
unpack_mcvx(b.data, &bmcvx);
- if (fout) {
- fprintf(fout, " -\n");
- fprintf(fout, " id: %" PRIu32 "\n", bmctk.idp);
- fprintf(fout, " energy: %.2f\n", bmctk.ene);
- fprintf(fout, " posx: %.2f\n", bmcvx.x);
- fprintf(fout, " posy: %.2f\n", bmcvx.y);
- fprintf(fout, " posz: %.2f\n", bmcvx.z);
- fprintf(fout, " dirx: %.4f\n", bmctk.drx);
- fprintf(fout, " diry: %.4f\n", bmctk.dry);
- fprintf(fout, " dirz: %.4f\n", bmctk.drz);
- fprintf(fout, " time: %.4f\n", bmcvx.tim);
+ if (output) {
+ hdf5_mcgn[nmcgn].evn = bmc.evn;
+ hdf5_mcgn[nmcgn].id = bmctk.idp;
+ hdf5_mcgn[nmcgn].energy = bmctk.ene;
+ hdf5_mcgn[nmcgn].x = bmcvx.x;
+ hdf5_mcgn[nmcgn].y = bmcvx.y;
+ hdf5_mcgn[nmcgn].z = bmcvx.z;
+ hdf5_mcgn[nmcgn].dirx = bmctk.drx;
+ hdf5_mcgn[nmcgn].diry = bmctk.dry;
+ hdf5_mcgn[nmcgn].dirz = bmctk.drz;
+ hdf5_mcgn[nmcgn].time = bmcvx.tim;
+ nmcgn++;
}
if (bmcgn.next) {
@@ -6156,7 +6168,6 @@ skip_mc:
}
}
- if (fout) fprintf(fout, " ev:\n");
while (1) {
unpack_ev(b.data, &bev);
ev.run = bev.run;
@@ -6185,49 +6196,44 @@ skip_mc:
if (get_event(f,&ev,&b)) goto skip_event;
- nhit = get_nhit(&ev);
+ if (output) {
+ hdf5_events[nevents].run = ev.run;
+ hdf5_events[nevents].evn = bmc.evn;
+ hdf5_events[nevents].gtr = ev.trigger_time;
+ hdf5_events[nevents].nhit = ev.nhit;
+ hdf5_events[nevents].gtid = ev.gtid;
+ hdf5_events[nevents].trg_type = ev.trigger_type;
+ hdf5_events[nevents].dc = get_dc_word(&ev, f, &bmast, &b);
- if (fout) {
- fprintf(fout, " - run: %i\n", ev.run);
- fprintf(fout, " gtr: %.0f\n", ev.trigger_time);
- fprintf(fout, " nhit: %zu\n", nhit);
- fprintf(fout, " gtid: %i\n", ev.gtid);
- fprintf(fout, " trg_type: 0x%08x\n", ev.trigger_type);
- fprintf(fout, " dc: 0x%08x\n", get_dc_word(&ev, f, &bmast, &b));
- }
-
- if (fout) {
if (get_ftpv(f,&b,&bftpv)) {
fprintf(stderr, "%s\n", zdab_err);
+ hdf5_events[nevents].ftp_x = NAN;
+ hdf5_events[nevents].ftp_y = NAN;
+ hdf5_events[nevents].ftp_z = NAN;
} else {
- fprintf(fout, " ftp:\n");
- fprintf(fout, " x: %.2f\n", bftpv.x);
- fprintf(fout, " y: %.2f\n", bftpv.y);
- fprintf(fout, " z: %.2f\n", bftpv.z);
+ hdf5_events[nevents].ftp_x = bftpv.x;
+ hdf5_events[nevents].ftp_y = bftpv.y;
+ hdf5_events[nevents].ftp_z = bftpv.z;
}
- }
- if (fout) {
if (get_ftxk(f,&b,&bftxk)) {
fprintf(stderr, "%s\n", zdab_err);
+ hdf5_events[nevents].ftk_energy = NAN;
} else {
- fprintf(fout, " ftk:\n");
- fprintf(fout, " energy: %.2f\n", bftxk.energy);
+ hdf5_events[nevents].ftk_energy = bftxk.energy;
}
- }
- if (fout) {
if (get_rsp(f,&b,&bftxr)) {
fprintf(stderr, "%s\n", zdab_err);
+ hdf5_events[nevents].rsp_energy = NAN;
} else {
- fprintf(fout, " rsp:\n");
- fprintf(fout, " energy: %.2f\n", bftxr.ene);
+ hdf5_events[nevents].rsp_energy = bftxr.ene;
}
- }
- if (nhit < min_nhit) goto skip_event;
+ nevents++;
+ }
- if (fout) fprintf(fout, " fit:\n");
+ if (ev.nhit < min_nhit) goto skip_event;
fmin_best = nll_best(&ev);
@@ -6251,25 +6257,47 @@ skip_mc:
elapsed = (tv_stop.tv_sec - tv_start.tv_sec)*1000 + (tv_stop.tv_usec - tv_start.tv_usec)/1000;
- if (fout) {
- sprintf_particle_string(id,i,tmp);
- fprintf(fout, " %s:\n", tmp);
- fprintf(fout, " posx: %.2f\n", xopt[0]);
- fprintf(fout, " posy: %.2f\n", xopt[1]);
- fprintf(fout, " posz: %.2f\n", xopt[2]);
- fprintf(fout, " t0: %.2f\n", xopt[3]);
- sprintf_yaml_list(xopt+4,i,3,tmp);
- fprintf(fout, " energy: %s\n", tmp);
- sprintf_yaml_list(xopt+5,i,3,tmp);
- fprintf(fout, " theta: %s\n", tmp);
- sprintf_yaml_list(xopt+6,i,3,tmp);
- fprintf(fout, " phi: %s\n", tmp);
- //fprintf(fout, " z1: %.2f\n", xopt[7]);
- //fprintf(fout, " z2: %.2f\n", xopt[8]);
- fprintf(fout, " fmin: %.2f\n", fmin);
- fprintf(fout, " time: %lld\n", elapsed);
- fprintf(fout, " psi: %.2f\n", fmin-fmin_best);
- fflush(fout);
+ if (output) {
+ hdf5_fits[nfits].run = ev.run;
+ hdf5_fits[nfits].gtid = ev.gtid;
+ hdf5_fits[nfits].n = i;
+ hdf5_fits[nfits].x = xopt[0];
+ hdf5_fits[nfits].y = xopt[1];
+ hdf5_fits[nfits].z = xopt[2];
+ hdf5_fits[nfits].t0 = xopt[3];
+ hdf5_fits[nfits].id1 = id[0];
+ hdf5_fits[nfits].energy1 = xopt[4];
+ hdf5_fits[nfits].theta1 = xopt[5];
+ hdf5_fits[nfits].phi1 = xopt[6];
+
+ if (i > 1) {
+ hdf5_fits[nfits].id2 = id[1];
+ hdf5_fits[nfits].energy2 = xopt[7];
+ hdf5_fits[nfits].theta2 = xopt[8];
+ hdf5_fits[nfits].phi2 = xopt[9];
+ } else {
+ hdf5_fits[nfits].id2 = 0;
+ hdf5_fits[nfits].energy2 = NAN;
+ hdf5_fits[nfits].theta2 = NAN;
+ hdf5_fits[nfits].phi2 = NAN;
+ }
+
+ if (i > 2) {
+ hdf5_fits[nfits].id3 = id[2];
+ hdf5_fits[nfits].energy3 = xopt[10];
+ hdf5_fits[nfits].theta3 = xopt[11];
+ hdf5_fits[nfits].phi3 = xopt[12];
+ } else {
+ hdf5_fits[nfits].id3 = 0;
+ hdf5_fits[nfits].energy3 = NAN;
+ hdf5_fits[nfits].theta3 = NAN;
+ hdf5_fits[nfits].phi3 = NAN;
+ }
+
+ hdf5_fits[nfits].fmin = fmin;
+ hdf5_fits[nfits].time = elapsed;
+ hdf5_fits[nfits].psi = fmin-fmin_best;
+ nfits++;
}
}
}
@@ -6292,13 +6320,13 @@ skip_event:
end:
+ if (output) save_output(output, hdf5_events, nevents, hdf5_mcgn, nmcgn, hdf5_fits, nfits);
+
free_interpolation();
pmt_response_free();
db_free(db);
- if (fout) fclose(fout);
-
zebra_close(f);
return 0;
@@ -6309,8 +6337,6 @@ err:
db_free(db);
- if (fout) fclose(fout);
-
zebra_close(f);
return 1;
diff --git a/src/hdf5_utils.c b/src/hdf5_utils.c
new file mode 100644
index 0000000..05d1002
--- /dev/null
+++ b/src/hdf5_utils.c
@@ -0,0 +1,193 @@
+#include "hdf5_utils.h"
+#include "hdf5.h"
+#include <string.h> /* for strlen() */
+
+char *GitSHA1(void);
+char *GitDirty(void);
+
+/* Write out data to an HDF5 file.
+ *
+ * This function writes out three datasets to an HDF5 file:
+ *
+ * 1. ev: the event dataset
+ *
+ * This dataset contains event level information similar to the EV bank in
+ * SNOMAN. It also contains a data cleaning word and the FTP, FTK, and RSP
+ * fit values for the position and energy.
+ *
+ * 2. mcgn: the primary particle tracks
+ *
+ * This dataset contains vertex and track information for all the primary
+ * particle tracks used to simulate the event.
+ *
+ * 3. fits: the fit information
+ *
+ * This dataset contains the reconstructed fit information for each event.
+ * Note that this dataset will in general contain multiple rows for a
+ * single event since we fit for different particle hypotheses.
+ *
+ * Currently the ev and mcgn datasets are fixed size, and the fits dataset is
+ * chunked and extensible to allow the cat-grid-jobs script to work.
+ *
+ */
+int save_output(const char *output, HDF5Event *hdf5_events, size_t nevents, HDF5MCGN *hdf5_mcgn, size_t nmcgn, HDF5Fit *hdf5_fits, size_t nfits)
+{
+ hid_t hdf5_events_tid; /* File datatype identifier */
+ hid_t file, dataset, space; /* Handles */
+ herr_t status;
+ hsize_t dim[1]; /* Dataspace dimensions */
+ hid_t hdf5_mcgn_tid; /* File datatype identifier */
+ hid_t dataset_mcgn, space_mcgn; /* Handles */
+ hsize_t dim_mcgn[1]; /* Dataspace dimensions */
+ hid_t hdf5_fits_tid; /* File datatype identifier */
+ hid_t dataset_fits, space_fits; /* Handles */
+ hsize_t dim_fits[1]; /* Dataspace dimensions */
+ hsize_t chunk_dims[1]; /* Dataspace dimensions */
+ hid_t dcpl_id; /* Handles */
+ hsize_t maxdims[1] = {H5S_UNLIMITED};
+
+ /*
+ * Create the data space.
+ */
+ dim[0] = nevents;
+ space = H5Screate_simple(1, dim, NULL);
+ dim_mcgn[0] = nmcgn;
+ space_mcgn = H5Screate_simple(1, dim_mcgn, NULL);
+ dim_fits[0] = nfits;
+ space_fits = H5Screate_simple(1, dim_fits, maxdims);
+ chunk_dims[0] = 10;
+ dcpl_id = H5Pcreate(H5P_DATASET_CREATE);
+ H5Pset_chunk(dcpl_id, 1, chunk_dims);
+
+ /*
+ * Create the file.
+ */
+ file = H5Fcreate(output, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+
+ hid_t attr1, attr2; /* Attribute identifiers */
+ hid_t aid1, aid2; /* Attribute dataspace identifiers */
+ hid_t atype1, atype2; /* Attribute type */
+
+ char *git_sha1 = GitSHA1();
+ char *git_dirty = GitDirty();
+
+ /*
+ * Create string attribute.
+ */
+ aid1 = H5Screate(H5S_SCALAR);
+ atype1 = H5Tcopy(H5T_C_S1);
+ H5Tset_size(atype1, strlen(git_sha1));
+ H5Tset_strpad(atype1,H5T_STR_NULLTERM);
+ attr1 = H5Acreate2(file, "git_sha1", atype1, aid1, H5P_DEFAULT, H5P_DEFAULT);
+
+ /*
+ * Create string attribute.
+ */
+ aid2 = H5Screate(H5S_SCALAR);
+ atype2 = H5Tcopy(H5T_C_S1);
+ H5Tset_size(atype2, strlen(git_dirty));
+ H5Tset_strpad(atype2,H5T_STR_NULLTERM);
+ attr2 = H5Acreate2(file, "git_dirty", atype2, aid2, H5P_DEFAULT, H5P_DEFAULT);
+
+ herr_t ret; /* Return value */
+
+ /*
+ * Write string attribute.
+ */
+ ret = H5Awrite(attr1, atype1, git_sha1);
+ ret = H5Awrite(attr2, atype2, git_dirty);
+
+ /*
+ * Create the memory data type.
+ */
+ hdf5_events_tid = H5Tcreate(H5T_COMPOUND, sizeof(HDF5Event));
+ H5Tinsert(hdf5_events_tid, "run", HOFFSET(HDF5Event, run), H5T_NATIVE_INT);
+ H5Tinsert(hdf5_events_tid, "evn", HOFFSET(HDF5Event, evn), H5T_NATIVE_INT);
+ H5Tinsert(hdf5_events_tid, "gtr", HOFFSET(HDF5Event, gtr), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_events_tid, "nhit", HOFFSET(HDF5Event, nhit), H5T_NATIVE_INT);
+ H5Tinsert(hdf5_events_tid, "gtid", HOFFSET(HDF5Event, gtid), H5T_NATIVE_UINT32);
+ H5Tinsert(hdf5_events_tid, "trg_type", HOFFSET(HDF5Event, trg_type), H5T_NATIVE_UINT32);
+ H5Tinsert(hdf5_events_tid, "dc", HOFFSET(HDF5Event, dc), H5T_NATIVE_UINT32);
+ H5Tinsert(hdf5_events_tid, "ftp_x", HOFFSET(HDF5Event, ftp_x), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_events_tid, "ftp_y", HOFFSET(HDF5Event, ftp_y), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_events_tid, "ftp_z", HOFFSET(HDF5Event, ftp_z), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_events_tid, "ftk_energy", HOFFSET(HDF5Event, ftk_energy), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_events_tid, "rsp_energy", HOFFSET(HDF5Event, rsp_energy), H5T_NATIVE_DOUBLE);
+
+ hdf5_mcgn_tid = H5Tcreate(H5T_COMPOUND, sizeof(HDF5MCGN));
+ H5Tinsert(hdf5_mcgn_tid, "run", HOFFSET(HDF5MCGN, run), H5T_NATIVE_INT);
+ H5Tinsert(hdf5_mcgn_tid, "evn", HOFFSET(HDF5MCGN, evn), H5T_NATIVE_INT);
+ H5Tinsert(hdf5_mcgn_tid, "id", HOFFSET(HDF5MCGN, id), H5T_NATIVE_INT);
+ H5Tinsert(hdf5_mcgn_tid, "energy", HOFFSET(HDF5MCGN, energy), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_mcgn_tid, "x", HOFFSET(HDF5MCGN, x), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_mcgn_tid, "y", HOFFSET(HDF5MCGN, y), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_mcgn_tid, "z", HOFFSET(HDF5MCGN, z), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_mcgn_tid, "dirx", HOFFSET(HDF5MCGN, dirx), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_mcgn_tid, "diry", HOFFSET(HDF5MCGN, diry), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_mcgn_tid, "dirz", HOFFSET(HDF5MCGN, dirz), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_mcgn_tid, "time", HOFFSET(HDF5MCGN, time), H5T_NATIVE_DOUBLE);
+
+ hdf5_fits_tid = H5Tcreate(H5T_COMPOUND, sizeof(HDF5Fit));
+ H5Tinsert(hdf5_fits_tid, "run", HOFFSET(HDF5Fit, run), H5T_NATIVE_INT);
+ H5Tinsert(hdf5_fits_tid, "gtid", HOFFSET(HDF5Fit, gtid), H5T_NATIVE_UINT32);
+ H5Tinsert(hdf5_fits_tid, "n", HOFFSET(HDF5Fit, n), H5T_NATIVE_INT);
+ H5Tinsert(hdf5_fits_tid, "x", HOFFSET(HDF5Fit, x), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_fits_tid, "y", HOFFSET(HDF5Fit, y), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_fits_tid, "z", HOFFSET(HDF5Fit, z), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_fits_tid, "t0", HOFFSET(HDF5Fit, t0), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_fits_tid, "id1", HOFFSET(HDF5Fit, id1), H5T_NATIVE_INT);
+ H5Tinsert(hdf5_fits_tid, "energy1", HOFFSET(HDF5Fit, energy1), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_fits_tid, "theta1", HOFFSET(HDF5Fit, theta1), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_fits_tid, "phi1", HOFFSET(HDF5Fit, phi1), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_fits_tid, "id2", HOFFSET(HDF5Fit, id2), H5T_NATIVE_INT);
+ H5Tinsert(hdf5_fits_tid, "energy2", HOFFSET(HDF5Fit, energy2), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_fits_tid, "theta2", HOFFSET(HDF5Fit, theta2), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_fits_tid, "phi2", HOFFSET(HDF5Fit, phi2), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_fits_tid, "id3", HOFFSET(HDF5Fit, id3), H5T_NATIVE_INT);
+ H5Tinsert(hdf5_fits_tid, "energy3", HOFFSET(HDF5Fit, energy3), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_fits_tid, "theta3", HOFFSET(HDF5Fit, theta3), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_fits_tid, "phi3", HOFFSET(HDF5Fit, phi3), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_fits_tid, "fmin", HOFFSET(HDF5Fit, fmin), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_fits_tid, "time", HOFFSET(HDF5Fit, time), H5T_NATIVE_DOUBLE);
+ H5Tinsert(hdf5_fits_tid, "psi", HOFFSET(HDF5Fit, psi), H5T_NATIVE_DOUBLE);
+
+ /*
+ * Create the dataset.
+ */
+ dataset = H5Dcreate2(file, "ev", hdf5_events_tid, space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+ dataset_mcgn = H5Dcreate2(file, "mcgn", hdf5_mcgn_tid, space_mcgn, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+ dataset_fits = H5Dcreate2(file, "fits", hdf5_fits_tid, space_fits, H5P_DEFAULT, dcpl_id, H5P_DEFAULT);
+
+ /*
+ * Wtite data to the dataset;
+ */
+ if (nevents)
+ status = H5Dwrite(dataset, hdf5_events_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, hdf5_events);
+ if (nmcgn)
+ status = H5Dwrite(dataset_mcgn, hdf5_mcgn_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, hdf5_mcgn);
+ if (nfits)
+ status = H5Dwrite(dataset_fits, hdf5_fits_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, hdf5_fits);
+
+ /*
+ * Release resources
+ */
+ H5Sclose(aid1);
+ H5Sclose(aid2);
+ H5Tclose(atype1);
+ H5Tclose(atype2);
+ H5Aclose(attr1);
+ H5Aclose(attr2);
+ H5Tclose(hdf5_events_tid);
+ H5Tclose(hdf5_mcgn_tid);
+ H5Tclose(hdf5_fits_tid);
+ H5Sclose(space);
+ H5Sclose(space_mcgn);
+ H5Sclose(space_fits);
+ H5Pclose(dcpl_id);
+ H5Dclose(dataset);
+ H5Dclose(dataset_mcgn);
+ H5Dclose(dataset_fits);
+ H5Fclose(file);
+
+ return 0;
+}
diff --git a/src/hdf5_utils.h b/src/hdf5_utils.h
new file mode 100644
index 0000000..4f6ad32
--- /dev/null
+++ b/src/hdf5_utils.h
@@ -0,0 +1,85 @@
+#ifndef HDF5_UTILS
+#define HDF5_UTILS
+
+#include <stdint.h>
+#include <stdlib.h> /* for size_t */
+
+/* Struct to hold information for each triggered event (similar to the EV bank,
+ * but with some reconstructed information).
+ *
+ * This is the struct that is written out to the 'ev' dataset in the HDF5 file.
+ *
+ * Note: If there is no FTP, FTK, or RSP info in the zdab file these values
+ * will be set to nan. */
+typedef struct HDF5Event {
+ int run;
+ int evn;
+ double gtr;
+ int nhit;
+ uint32_t gtid;
+ uint32_t trg_type;
+ uint32_t dc;
+ double ftp_x;
+ double ftp_y;
+ double ftp_z;
+ double ftk_energy;
+ double rsp_energy;
+} HDF5Event;
+
+/* Struct to hold information for each primary seed tracks that is written out
+ * as the `mcgn` dataset in the HDF5 file. To figure out which track
+ * corresponds to a given GTID you can match them up based on the evn column,
+ * which is filled with the MC event number in both datasets. */
+typedef struct HDF5MCGN {
+ int run;
+ int evn;
+ int id;
+ double energy;
+ double x;
+ double y;
+ double z;
+ double dirx;
+ double diry;
+ double dirz;
+ double time;
+} HDF5MCGN;
+
+/* Struct to hold information for each fit that is written out
+ * as the `fits` dataset in the HDF5 file.
+ *
+ * Note that the energy2, theta2, phi2 fields will only be filled in for fits
+ * with 2 tracks and similarly for energy3, theta3, and phi3 for 3 track fits. */
+typedef struct HDF5Fit {
+ int run;
+ uint32_t gtid;
+ double x;
+ double y;
+ double z;
+ double t0;
+
+ /* Number of tracks. */
+ int n;
+
+ int id1;
+ double energy1;
+ double theta1;
+ double phi1;
+
+ int id2;
+ double energy2;
+ double theta2;
+ double phi2;
+
+ int id3;
+ double energy3;
+ double theta3;
+ double phi3;
+
+ double fmin;
+ double time;
+ double psi;
+} HDF5Fit;
+
+int save_output(const char *output, HDF5Event *hdf5_events, size_t nevents, HDF5MCGN *hdf5_mcgn, size_t nmcgn, HDF5Fit *hdf5_fits, size_t nfits);
+
+#endif
diff --git a/src/zdab-cat.c b/src/zdab-cat.c
index 5b9bb3d..278236c 100644
--- a/src/zdab-cat.c
+++ b/src/zdab-cat.c
@@ -26,6 +26,15 @@
#include <errno.h> /* for errno */
#include "release.h"
#include "dc.h"
+#include "hdf5.h"
+#include <math.h>
+#include "hdf5_utils.h"
+
+/* Maximum number of events, primary seed particle tracks, and fits.
+ *
+ * FIXME: I should write out the events, tracks, and fits as they come up
+ * instead of storing them until the end. */
+#define MAX_NEVENTS 1000000
#define EV_RECORD 0x45562020 // 'EV '
#define MCTK_RECORD 0x4d43544b // 'MCTK'
@@ -47,22 +56,25 @@ int main(int argc, char **argv)
{
int i;
zebraFile *f;
- zebraBank bmast, bmc, bmcgn, mctk, b;
+ zebraBank bmast, mc, bmcgn, mctk, b;
int rv;
EVBank bev;
FTPVBank bftpv;
FTXKBank bftxk;
RSPBank bftxr;
+ MCBank bmc;
MCTKBank bmctk;
MCVXBank bmcvx;
event ev = {0};
char *filename = NULL;
char *output = NULL;
- FILE *fout = stdout;
int skip_second_event = 0;
- size_t nhit;
int last_run;
char dqxx_file[256];
+ int nevents = 0;
+ static HDF5Event hdf5_events[MAX_NEVENTS];
+ int nmcgn = 0;
+ static HDF5MCGN hdf5_mcgn[MAX_NEVENTS];
for (i = 1; i < argc; i++) {
if (strlen(argv[i]) >= 2 && !strncmp(argv[i], "--", 2)) {
@@ -96,23 +108,8 @@ int main(int argc, char **argv)
return 1;
}
- if (output) {
- fout = fopen(output, "w");
-
- if (!fout) {
- fprintf(stderr, "failed to open '%s': %s\n", output, strerror(errno));
- return 1;
- }
- }
-
- if (fout) {
- fprintf(fout, "git_sha1: %s\n", GitSHA1());
- fprintf(fout, "git_dirty: %s\n", GitDirty());
- }
-
if (load_pmt_info()) {
zebra_close(f);
- if (output) fclose(fout);
return 1;
}
@@ -160,11 +157,9 @@ int main(int argc, char **argv)
continue;
}
- if (fout) fprintf(fout, "---\n");
-
if (bmast.links[KMAST_MC-1] == 0) goto skip_mc;
- rv = zebra_get_bank(f,&bmc,bmast.links[KMAST_MC-1]);
+ rv = zebra_get_bank(f,&mc,bmast.links[KMAST_MC-1]);
if (rv) {
fprintf(stderr, "error getting MC bank: %s\n", zebra_err);
@@ -176,14 +171,15 @@ int main(int argc, char **argv)
goto err;
}
- rv = zebra_get_bank(f,&bmcgn,bmc.links[KMC_MCGN-1]);
+ unpack_mc(mc.data, &bmc);
+
+ rv = zebra_get_bank(f,&bmcgn,mc.links[KMC_MCGN-1]);
if (rv) {
fprintf(stderr, "error getting MCGN bank: %s\n", zebra_err);
goto err;
}
- if (fout) fprintf(fout, " mcgn:\n");
while (1) {
if (bmcgn.links[KMCGN_MCTK-1] == 0) {
fprintf(stderr, "MCTK link is zero!\n");
@@ -232,17 +228,18 @@ int main(int argc, char **argv)
unpack_mcvx(b.data, &bmcvx);
- if (fout) {
- fprintf(fout, " -\n");
- fprintf(fout, " id: %" PRIu32 "\n", bmctk.idp);
- fprintf(fout, " energy: %.2f\n", bmctk.ene);
- fprintf(fout, " posx: %.2f\n", bmcvx.x);
- fprintf(fout, " posy: %.2f\n", bmcvx.y);
- fprintf(fout, " posz: %.2f\n", bmcvx.z);
- fprintf(fout, " dirx: %.4f\n", bmctk.drx);
- fprintf(fout, " diry: %.4f\n", bmctk.dry);
- fprintf(fout, " dirz: %.4f\n", bmctk.drz);
- fprintf(fout, " time: %.4f\n", bmcvx.tim);
+ if (output) {
+ hdf5_mcgn[nmcgn].evn = bmc.evn;
+ hdf5_mcgn[nmcgn].id = bmctk.idp;
+ hdf5_mcgn[nmcgn].energy = bmctk.ene;
+ hdf5_mcgn[nmcgn].x = bmcvx.x;
+ hdf5_mcgn[nmcgn].y = bmcvx.y;
+ hdf5_mcgn[nmcgn].z = bmcvx.z;
+ hdf5_mcgn[nmcgn].dirx = bmctk.drx;
+ hdf5_mcgn[nmcgn].diry = bmctk.dry;
+ hdf5_mcgn[nmcgn].dirz = bmctk.drz;
+ hdf5_mcgn[nmcgn].time = bmcvx.tim;
+ nmcgn++;
}
if (bmcgn.next) {
@@ -278,7 +275,6 @@ skip_mc:
}
}
- if (fout) fprintf(fout, " ev:\n");
while (1) {
unpack_ev(b.data, &bev);
ev.run = bev.run;
@@ -305,44 +301,41 @@ skip_mc:
rv = get_event(f,&ev,&b);
- nhit = get_nhit(&ev);
+ if (output) {
+ hdf5_events[nevents].run = ev.run;
+ hdf5_events[nevents].evn = bmc.evn;
+ hdf5_events[nevents].gtr = ev.trigger_time;
+ hdf5_events[nevents].nhit = ev.nhit;
+ hdf5_events[nevents].gtid = ev.gtid;
+ hdf5_events[nevents].trg_type = ev.trigger_type;
+ hdf5_events[nevents].dc = get_dc_word(&ev, f, &bmast, &b);
- if (fout) {
- fprintf(fout, " - run: %i\n", ev.run);
- fprintf(fout, " gtr: %.0f\n", ev.trigger_time);
- fprintf(fout, " nhit: %zu\n", nhit);
- fprintf(fout, " gtid: %i\n", ev.gtid);
- fprintf(fout, " trg_type: 0x%08x\n", ev.trigger_type);
- fprintf(fout, " dc: 0x%08x\n", get_dc_word(&ev, f, &bmast, &b));
- }
-
- if (fout) {
if (get_ftpv(f,&b,&bftpv)) {
fprintf(stderr, "%s\n", zdab_err);
+ hdf5_events[nevents].ftp_x = NAN;
+ hdf5_events[nevents].ftp_y = NAN;
+ hdf5_events[nevents].ftp_z = NAN;
} else {
- fprintf(fout, " ftp:\n");
- fprintf(fout, " x: %.2f\n", bftpv.x);
- fprintf(fout, " y: %.2f\n", bftpv.y);
- fprintf(fout, " z: %.2f\n", bftpv.z);
+ hdf5_events[nevents].ftp_x = bftpv.x;
+ hdf5_events[nevents].ftp_y = bftpv.y;
+ hdf5_events[nevents].ftp_z = bftpv.z;
}
- }
- if (fout) {
if (get_ftxk(f,&b,&bftxk)) {
fprintf(stderr, "%s\n", zdab_err);
+ hdf5_events[nevents].ftk_energy = NAN;
} else {
- fprintf(fout, " ftk:\n");
- fprintf(fout, " energy: %.2f\n", bftxk.energy);
+ hdf5_events[nevents].ftk_energy = bftxk.energy;
}
- }
- if (fout) {
if (get_rsp(f,&b,&bftxr)) {
fprintf(stderr, "%s\n", zdab_err);
+ hdf5_events[nevents].rsp_energy = NAN;
} else {
- fprintf(fout, " rsp:\n");
- fprintf(fout, " energy: %.2f\n", bftxr.ene);
+ hdf5_events[nevents].rsp_energy = bftxr.ene;
}
+
+ nevents++;
}
/* Note the origin link for the first EV bank points back to the
@@ -359,9 +352,11 @@ skip_mc:
}
}
- db_free(db);
+ if (output) {
+ save_output(output, hdf5_events, nevents, hdf5_mcgn, nmcgn, NULL, 0);
+ }
- if (fout) fclose(fout);
+ db_free(db);
zebra_close(f);
@@ -370,8 +365,6 @@ skip_mc:
err:
db_free(db);
- if (fout) fclose(fout);
-
zebra_close(f);
return 1;
diff --git a/src/zdab_utils.c b/src/zdab_utils.c
index 716503c..416bd79 100644
--- a/src/zdab_utils.c
+++ b/src/zdab_utils.c
@@ -580,6 +580,24 @@ void unpack_ftk(uint32_t *data, FTKBank *b)
unpack((uint8_t *) (data+25), "f",&b->spare5);
}
+void unpack_mc(uint32_t *data, MCBank *b)
+{
+ unpack((uint8_t *) data, "l",&b->jdy);
+ unpack((uint8_t *) (data+1), "l",&b->ut1);
+ unpack((uint8_t *) (data+2), "l",&b->ut2);
+ unpack((uint8_t *) (data+3), "l",&b->dte);
+ unpack((uint8_t *) (data+4), "l",&b->hmsc);
+ unpack((uint8_t *) (data+5), "l",&b->seed1);
+ unpack((uint8_t *) (data+6), "l",&b->seed2);
+ unpack((uint8_t *) (data+7), "l",&b->seed_num);
+ unpack((uint8_t *) (data+8), "l",&b->dtp);
+ unpack((uint8_t *) (data+9), "f",&b->mcver);
+ unpack((uint8_t *) (data+10), "l",&b->evn);
+ unpack((uint8_t *) (data+11), "F",&b->gtr);
+ unpack((uint8_t *) (data+13), "l",&b->num_ge_err);
+ unpack((uint8_t *) (data+14), "f",&b->pmt_eff);
+}
+
void unpack_mcgn(uint32_t *data, MCGNBank *b)
{
unpack((uint8_t *) data, "l",&b->id);
diff --git a/src/zdab_utils.h b/src/zdab_utils.h
index d4411d9..043e185 100644
--- a/src/zdab_utils.h
+++ b/src/zdab_utils.h
@@ -378,6 +378,37 @@ typedef struct FTKBank {
float spare5;
} FTKBank;
+typedef struct MCBank {
+ /* Julian date. */
+ uint32_t jdy;
+ /* Universal time seconds. */
+ uint32_t ut1;
+ /* Universal time nanoseconds. */
+ uint32_t ut2;
+ /* Date (format: yyyymmdd). */
+ uint32_t dte;
+ /* Time (format: hhmmsscc - cc is centisec). */
+ uint32_t hmsc;
+ /* First random number seed. */
+ uint32_t seed1;
+ /* Second random number seed. */
+ uint32_t seed2;
+ /* Random number number. */
+ uint32_t seed_num;
+ /* Data type. */
+ uint32_t dtp;
+ /* SNOMAN MC version number. */
+ float mcver;
+ /* Monte Carlo event number. */
+ uint32_t evn;
+ /* Generation time in nsec (first word). */
+ double gtr;
+ /* Number of suppressed Cerenkov photon tracking errors. */
+ uint32_t num_ge_err;
+ /* The current PMT collection efficiency. */
+ float pmt_eff;
+} MCBank;
+
typedef struct MCGNBank {
/* Particle id. */
uint32_t id;
@@ -638,6 +669,7 @@ void unpack_ftpt(uint32_t *data, FTPTBank *b);
void unpack_ftpv(uint32_t *data, FTPVBank *b);
void unpack_ftxk(uint32_t *data, FTXKBank *b);
void unpack_ftk(uint32_t *data, FTKBank *b);
+void unpack_mc(uint32_t *data, MCBank *b);
void unpack_mcgn(uint32_t *data, MCGNBank *b);
void unpack_mcvx(uint32_t *data, MCVXBank *b);
void unpack_mctk(uint32_t *data, MCTKBank *b);
diff --git a/utils/Makefile b/utils/Makefile
index ef21a09..011a723 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -13,5 +13,6 @@ install:
$(INSTALL) plot-likelihood $(INSTALL_BIN)
$(INSTALL) submit-grid-jobs $(INSTALL_BIN)
$(INSTALL) gen-dark-matter $(INSTALL_BIN)
+ $(INSTALL) fit-wrapper $(INSTALL_BIN)
.PHONY: install
diff --git a/utils/cat-grid-jobs b/utils/cat-grid-jobs
index ba06290..83a8004 100755
--- a/utils/cat-grid-jobs
+++ b/utils/cat-grid-jobs
@@ -23,7 +23,7 @@ as YAML to stdout.
Example:
- $ cat-grid-jobs --dir ~/sddm/src/ ~/mc_atm_nu_no_osc_genie_010000_0.mcds ~/grid_job_results/*.txt > output.txt
+ $ cat-grid-jobs ~/mc_atm_nu_no_osc_genie_010000_0.mcds ~/grid_job_results/*.txt > output.txt
"""
@@ -33,80 +33,145 @@ try:
from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
from yaml import Loader, Dumper
+import os
+import sys
+
+# Check that a given file can be accessed with the correct mode.
+# Additionally check that `file` is not a directory, as on Windows
+# directories pass the os.access check.
+def _access_check(fn, mode):
+ return (os.path.exists(fn) and os.access(fn, mode) and not os.path.isdir(fn))
+
+def which(cmd, mode=os.F_OK | os.X_OK, path=None):
+ """Given a command, mode, and a PATH string, return the path which
+ conforms to the given mode on the PATH, or None if there is no such
+ file.
+ `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
+ of os.environ.get("PATH"), or can be overridden with a custom search
+ path.
+ """
+ # If we're given a path with a directory part, look it up directly rather
+ # than referring to PATH directories. This includes checking relative to the
+ # current directory, e.g. ./script
+ if os.path.dirname(cmd):
+ if _access_check(cmd, mode):
+ return cmd
+ return None
+
+ if path is None:
+ path = os.environ.get("PATH", None)
+ if path is None:
+ try:
+ path = os.confstr("CS_PATH")
+ except (AttributeError, ValueError):
+ # os.confstr() or CS_PATH is not available
+ path = os.defpath
+ # bpo-35755: Don't use os.defpath if the PATH environment variable is
+ # set to an empty string
+
+ # PATH='' doesn't match, whereas PATH=':' looks in the current directory
+ if not path:
+ return None
+
+ path = path.split(os.pathsep)
+
+ if sys.platform == "win32":
+ # The current directory takes precedence on Windows.
+ curdir = os.curdir
+ if curdir not in path:
+ path.insert(0, curdir)
+
+ # PATHEXT is necessary to check on Windows.
+ pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
+ # See if the given file matches any of the expected path extensions.
+ # This will allow us to short circuit when given "python.exe".
+ # If it does match, only test that one, otherwise we have to try
+ # others.
+ if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
+ files = [cmd]
+ else:
+ files = [cmd + ext for ext in pathext]
+ else:
+ # On other platforms you don't have things like PATHEXT to tell you
+ # what file suffixes are executable, so just pass on cmd as-is.
+ files = [cmd]
+
+ seen = set()
+ for dir in path:
+ normdir = os.path.normcase(dir)
+ if not normdir in seen:
+ seen.add(normdir)
+ for thefile in files:
+ name = os.path.join(dir, thefile)
+ if _access_check(name, mode):
+ return name
+ return None
+
+# from https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python
+class bcolors:
+ HEADER = '\033[95m'
+ OKBLUE = '\033[94m'
+ OKGREEN = '\033[92m'
+ WARNING = '\033[93m'
+ FAIL = '\033[91m'
+ ENDC = '\033[0m'
+ BOLD = '\033[1m'
+ UNDERLINE = '\033[4m'
+
+def print_warning(msg):
+ print(bcolors.FAIL + msg + bcolors.ENDC,file=sys.stderr)
if __name__ == '__main__':
import argparse
import matplotlib.pyplot as plt
import numpy as np
- import subprocess
+ from subprocess import check_call
from os.path import join
import os
import sys
+ import h5py
parser = argparse.ArgumentParser("concatenate fit results from grid jobs into a single file")
- parser.add_argument("--dir", type=str, help="fitter directory", required=True)
parser.add_argument("zdab", help="zdab input file")
parser.add_argument("filenames", nargs='+', help="input files")
+ parser.add_argument("-o", "--output", type=str, help="output filename", required=True)
args = parser.parse_args()
- fit_results = {}
+ zdab_cat = which("zdab-cat")
- # First we create a dictionary mapping (run, gtid) -> fit results.
- for filename in args.filenames:
- with open(filename) as f:
- data = yaml.load(f,Loader=Loader)
+ if zdab_cat is None:
+ print("couldn't find zdab-cat in path!",file=sys.stderr)
+ sys.exit(1)
- if data is None:
- continue
-
- for event in data['data']:
- if event['ev'] is None:
- continue
-
- # if the ev branch is filled in, it means the event was fit
- for ev in event['ev']:
- # add the git SHA1 hash to the fit results since technically it
- # could be different than the version in zdab-cat
- ev['fit']['git_sha1'] = data['git_sha1']
- ev['fit']['git_dirty'] = data['git_dirty']
- fit_results[(ev['run'],ev['gtid'])] = ev['fit']
-
- # Next we get the full event list along with the data cleaning word, FTP
+ # First we get the full event list along with the data cleaning word, FTP
# position, FTK, and RSP energy from the original zdab and then add the fit
# results.
#
# Note: We send stderr to /dev/null since there can be a lot of warnings
# about PMT types and fit results
with open(os.devnull, 'w') as f:
- popen = subprocess.Popen([join(args.dir,"zdab-cat"),args.zdab],stdout=subprocess.PIPE,stderr=f)
+ check_call([zdab_cat,args.zdab,"-o",args.output],stderr=f)
total_events = 0
events_with_fit = 0
-
- doc = {'data': []}
-
- for data in yaml.load_all(popen.stdout,Loader=Loader):
- if 'ev' not in data:
- doc.update(data)
- continue
-
- for ev in data['ev']:
- run = ev['run']
- gtid = ev['gtid']
-
- if (run,gtid) in fit_results:
- ev['fit'] = fit_results[(run,gtid)]
- events_with_fit += 1
-
- total_events += 1
-
- doc['data'].append(data)
-
- popen.wait()
+ total_fits = 0
+
+ with h5py.File(args.output,"a") as fout:
+ total_events = fout['ev'].shape[0]
+ for filename in args.filenames:
+ with h5py.File(filename) as f:
+ # Check to see if the git sha1 match
+ if fout.attrs['git_sha1'] != f.attrs['git_sha1']:
+ print_warning("git_sha1 is %s for current version but %s for %s" % (fout.attrs['git_sha1'],f.attrs['git_sha1'],filename))
+ # get fits which match up with the events
+ valid_fits = f['fits'][np.isin(f['fits'][:][['run','gtid']],fout['ev'][:][['run','gtid']])]
+ # Add the fit results
+ fout['fits'].resize((fout['fits'].shape[0]+valid_fits.shape[0],))
+ fout['fits'][-valid_fits.shape[0]:] = valid_fits
+ events_with_fit += len(np.unique(valid_fits[['run','gtid']]))
+ total_fits += len(np.unique(f['fits']['run','gtid']))
# Print out number of fit results that were added. Hopefully, this will
# make it easy to catch an error if, for example, this gets run with a
# mismatching zdab and fit results
- print("added %i/%i fit results to a total of %i events" % (events_with_fit, len(fit_results), total_events),file=sys.stderr)
-
- print(yaml.dump(doc,default_flow_style=False,Dumper=Dumper))
+ print("added %i/%i fit results to a total of %i events" % (events_with_fit, total_fits, total_events),file=sys.stderr)
diff --git a/utils/fit-wrapper b/utils/fit-wrapper
new file mode 100644
index 0000000..940b807
--- /dev/null
+++ b/utils/fit-wrapper
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+# Simple wrapper script to run jobs on the grid
+
+module load hdf5
+module load zlib
+./fit "$@"
diff --git a/utils/plot b/utils/plot
index ac93eda..1bac500 100755
--- a/utils/plot
+++ b/utils/plot
@@ -74,6 +74,8 @@ if __name__ == '__main__':
import argparse
import matplotlib.pyplot as plt
import numpy as np
+ import h5py
+ import pandas as pd
parser = argparse.ArgumentParser("plot fit results")
parser.add_argument("filenames", nargs='+', help="input files")
@@ -81,121 +83,113 @@ if __name__ == '__main__':
for filename in args.filenames:
print(filename)
- with open(filename) as f:
- data = yaml.load(f.read(),Loader=Loader)
-
- dx = []
- dy = []
- dz = []
- dT = []
- thetas = []
- likelihood_ratio = []
- t_electron = []
- t_muon = []
- psi = []
- for event in data['data']:
- # get the particle ID
- id = event['mcgn'][0]['id']
-
- if 'ev' not in event:
- continue
-
- if 'fit' not in event['ev'][0]:
- # if nhit < 100 we don't fit the event
- continue
-
- if id not in event['ev'][0]['fit']:
- continue
-
- fit = event['ev'][0]['fit']
-
- mass = SNOMAN_MASS[id]
- # for some reason it's the *second* track which seems to contain the
- # initial energy
- true_energy = event['mcgn'][0]['energy']
- # The MCTK bank has the particle's total energy (except for neutrons)
- # so we need to convert it into kinetic energy
- ke = true_energy - mass
- energy = fit[id]['energy']
- dT.append(energy-ke)
- true_posx = event['mcgn'][0]['posx']
- posx = fit[id]['posx']
- dx.append(posx-true_posx)
- true_posy = event['mcgn'][0]['posy']
- posy = fit[id]['posy']
- dy.append(posy-true_posy)
- true_posz = event['mcgn'][0]['posz']
- posz = fit[id]['posz']
- dz.append(posz-true_posz)
- dirx = event['mcgn'][0]['dirx']
- diry = event['mcgn'][0]['diry']
- dirz = event['mcgn'][0]['dirz']
- true_dir = [dirx,diry,dirz]
- true_dir = np.array(true_dir)/np.linalg.norm(true_dir)
- theta = fit[id]['theta']
- phi = fit[id]['phi']
- dir = [np.sin(theta)*np.cos(phi),np.sin(theta)*np.sin(phi),np.cos(theta)]
- dir = np.array(dir)/np.linalg.norm(dir)
- thetas.append(np.degrees(np.arccos(np.dot(true_dir,dir))))
- if IDP_E_MINUS in fit and IDP_MU_MINUS in fit:
- fmin_electron = fit[IDP_E_MINUS]['fmin']
- fmin_muon = fit[IDP_MU_MINUS]['fmin']
- likelihood_ratio.append(fmin_muon-fmin_electron)
- if IDP_E_MINUS in fit:
- t_electron.append(fit[IDP_E_MINUS]['time'])
- if IDP_MU_MINUS in fit:
- t_muon.append(fit[IDP_MU_MINUS]['time'])
-
- if 'nhit' in event['ev'][0]:
- nhit = event['ev'][0]['nhit']
- psi.append(fit[id]['psi']/nhit)
-
- if len(t_electron) < 2 or len(t_muon) < 2:
+
+ with h5py.File(filename) as f:
+ ev = pd.read_hdf(filename, "ev")
+ mcgn = pd.read_hdf(filename, "mcgn")
+ fits = pd.read_hdf(filename, "fits")
+
+ # get rid of 2nd events like Michel electrons
+ ev = ev.sort_values(['run','gtid']).groupby(['evn'],as_index=False).first()
+
+ # Now, we merge all three datasets together to produce a single
+ # dataframe. To do so, we join the ev dataframe with the mcgn frame
+ # on the evn column, and then join with the fits on the run and
+ # gtid columns.
+ #
+ # At the end we will have a single dataframe with one row for each
+ # fit, i.e. it will look like:
+ #
+ # >>> data
+ # run gtid nhit, ... mcgn_x, mcgn_y, mcgn_z, ..., fit_id1, fit_x, fit_y, fit_z, ...
+ #
+ # Before merging, we prefix the primary seed track table with mcgn_
+ # and the fit table with fit_ just to make things easier.
+
+ # Prefix track and fit frames
+ mcgn = mcgn.add_prefix("mcgn_")
+ fits = fits.add_prefix("fit_")
+
+ # merge ev and mcgn on evn
+ data = ev.merge(mcgn,left_on=['evn'],right_on=['mcgn_evn'])
+ # merge data and fits on run and gtid
+ data = data.merge(fits,left_on=['run','gtid'],right_on=['fit_run','fit_gtid'])
+
+ # calculate true kinetic energy
+ mass = [SNOMAN_MASS[id] for id in data['mcgn_id'].values]
+ data['T'] = data['mcgn_energy'].values - mass
+ data['dx'] = data['fit_x'].values - data['mcgn_x'].values
+ data['dy'] = data['fit_y'].values - data['mcgn_y'].values
+ data['dz'] = data['fit_z'].values - data['mcgn_z'].values
+ data['dT'] = data['fit_energy1'].values - data['T'].values
+
+ true_dir = np.dstack((data['mcgn_dirx'],data['mcgn_diry'],data['mcgn_dirz'])).squeeze()
+ dir = np.dstack((np.sin(data['fit_theta1'])*np.cos(data['fit_phi1']),
+ np.sin(data['fit_theta1'])*np.sin(data['fit_phi1']),
+ np.cos(data['fit_theta1']))).squeeze()
+
+ data['theta'] = np.degrees(np.arccos((true_dir*dir).sum(axis=-1)))
+
+ # only select fits which have at least 2 fits
+ data = data.groupby(['run','gtid']).filter(lambda x: len(x) > 1)
+ data_true = data[data['fit_id1'] == data['mcgn_id']]
+ data_e = data[data['fit_id1'] == IDP_E_MINUS]
+ data_mu = data[data['fit_id1'] == IDP_MU_MINUS]
+
+ data_true = data_true.set_index(['run','gtid'])
+ data_e = data_e.set_index(['run','gtid'])
+ data_mu = data_mu.set_index(['run','gtid'])
+
+ data_true['ratio'] = data_mu['fit_fmin']-data_e['fit_fmin']
+ data_true['te'] = data_e['fit_time']
+ data_true['tm'] = data_mu['fit_time']
+ data_true['Te'] = data_e['fit_energy1']
+
+ if len(data_true) < 2:
continue
- mean, mean_error, std, std_error = get_stats(dT)
+ mean, mean_error, std, std_error = get_stats(data_true.dT)
print("dT = %.2g +/- %.2g" % (mean, mean_error))
print("std(dT) = %.2g +/- %.2g" % (std, std_error))
- mean, mean_error, std, std_error = get_stats(dx)
+ mean, mean_error, std, std_error = get_stats(data_true.dx)
print("dx = %4.2g +/- %.2g" % (mean, mean_error))
print("std(dx) = %4.2g +/- %.2g" % (std, std_error))
- mean, mean_error, std, std_error = get_stats(dy)
+ mean, mean_error, std, std_error = get_stats(data_true.dy)
print("dy = %4.2g +/- %.2g" % (mean, mean_error))
print("std(dy) = %4.2g +/- %.2g" % (std, std_error))
- mean, mean_error, std, std_error = get_stats(dz)
+ mean, mean_error, std, std_error = get_stats(data_true.dz)
print("dz = %4.2g +/- %.2g" % (mean, mean_error))
print("std(dz) = %4.2g +/- %.2g" % (std, std_error))
- mean, mean_error, std, std_error = get_stats(thetas)
+ mean, mean_error, std, std_error = get_stats(data_true.theta)
print("std(theta) = %4.2g +/- %.2g" % (std, std_error))
plt.figure(1)
- plot_hist(dT, label=filename)
+ plot_hist(data_true.dT, label=filename)
plt.xlabel("Kinetic Energy difference (MeV)")
plt.figure(2)
- plot_hist(dx, label=filename)
+ plot_hist(data_true.dx, label=filename)
plt.xlabel("X Position difference (cm)")
plt.figure(3)
- plot_hist(dy, label=filename)
+ plot_hist(data_true.dy, label=filename)
plt.xlabel("Y Position difference (cm)")
plt.figure(4)
- plot_hist(dz, label=filename)
+ plot_hist(data_true.dz, label=filename)
plt.xlabel("Z Position difference (cm)")
plt.figure(5)
- plot_hist(thetas, label=filename)
+ plot_hist(data_true.theta, label=filename)
plt.xlabel(r"$\theta$ (deg)")
plt.figure(6)
- plot_hist(likelihood_ratio, label=filename)
+ plot_hist(data_true.ratio, label=filename)
plt.xlabel(r"Log Likelihood Ratio ($e/\mu$)")
plt.figure(7)
- plot_hist(np.array(t_electron)/1e3/60.0, label=filename)
+ plot_hist(data_true.te/1e3/60.0, label=filename)
plt.xlabel(r"Electron Fit time (minutes)")
plt.figure(8)
- plot_hist(np.array(t_muon)/1e3/60.0, label=filename)
+ plot_hist(data_true.tm/1e3/60.0, label=filename)
plt.xlabel(r"Muon Fit time (minutes)")
- if len(psi):
- plt.figure(9)
- plot_hist(psi, label=filename)
- plt.xlabel(r"$\Psi$/Nhit")
+ plt.figure(9)
+ plot_hist(data_true.fit_psi/data_true.nhit, label=filename)
+ plt.xlabel(r"$\Psi$/Nhit")
plot_legend(1)
plot_legend(2)
@@ -205,6 +199,5 @@ if __name__ == '__main__':
plot_legend(6)
plot_legend(7)
plot_legend(8)
- if len(psi):
- plot_legend(9)
+ plot_legend(9)
plt.show()
diff --git a/utils/plot-energy b/utils/plot-energy
index a0274b7..4a8521b 100755
--- a/utils/plot-energy
+++ b/utils/plot-energy
@@ -107,140 +107,69 @@ if __name__ == '__main__':
import numpy as np
import pandas as pd
import sys
+ import h5py
parser = argparse.ArgumentParser("plot fit results")
parser.add_argument("filenames", nargs='+', help="input files")
args = parser.parse_args()
- events = []
- fit_results = []
-
- for filename in args.filenames:
- print(filename)
- with open(filename) as f:
- data = yaml.load(f,Loader=Loader)
-
- for i, event in enumerate(data['data']):
- for ev in event['ev']:
- events.append((
- ev['run'],
- ev['gtr'],
- ev['nhit'],
- ev['gtid'],
- ev['dc'],
- ev['ftp']['x'] if 'ftp' in ev else np.nan,
- ev['ftp']['y'] if 'ftp' in ev else np.nan,
- ev['ftp']['z'] if 'ftp' in ev else np.nan,
- ev['rsp']['energy'] if 'rsp' in ev and ev['rsp']['energy'] > 0 else np.nan,
- ))
-
- if 'fit' not in ev:
- continue
-
- for id, fit_result in [x for x in ev['fit'].iteritems() if isinstance(x[0],int)]:
- # FIXME: Should I just store the particle ids in the YAML
- # output as a list of particle ids instead of a single
- # integer?
- ids = map(int,chunks(str(id),2))
- energy = 0.0
- skip = False
- for i, ke in zip(ids,np.atleast_1d(fit_result['energy'])):
- energy += ke + SNOMAN_MASS[i]
-
- # This is a bit of a hack. It appears that many times
- # the fit will actually do much better by including a
- # very low energy electron or muon. I believe the
- # reason for this is that of course my likelihood
- # function is not perfect (for example, I don't include
- # the correct angular distribution for Rayleigh
- # scattered light), and so the fitter often wants to
- # add a very low energy electron or muon to fix things.
- #
- # Ideally I would fix the likelihood function, but for
- # now we just discard any fit results which have a very
- # low energy electron or muon.
- if len(ids) > 1 and i == 20 and ke < 20.0:
- skip = True
-
- if len(ids) > 1 and i == 22 and ke < 200.0:
- skip = True
-
- if skip:
- continue
-
- # Calculate the approximate Ockham factor.
- # See Chapter 20 in "Probability Theory: The Logic of Science" by Jaynes
- #
- # Note: This is a really approximate form by assuming that
- # the shape of the likelihood space is equal to the average
- # uncertainty in the different parameters.
- w = len(ids)*np.log(0.1*0.001) + np.sum(np.log(fit_result['energy'])) + len(ids)*np.log(1e-4/(4*np.pi))
-
- fit_results.append((
- ev['run'],
- ev['gtid'],
- id,
- fit_result['posx'],
- fit_result['posy'],
- fit_result['posz'],
- fit_result['t0'],
- energy,
- np.atleast_1d(fit_result['theta'])[0],
- np.atleast_1d(fit_result['phi'])[0],
- fit_result['fmin'] - w,
- fit_result['psi']/ev['nhit']))
-
- # create a dataframe
- # note: we have to first create a numpy structured array since there is no
- # way to pass a list of data types to the DataFrame constructor. See
- # https://github.com/pandas-dev/pandas/issues/4464
- array = np.array(fit_results,
- dtype=[('run',np.int), # run number
- ('gtid',np.int), # gtid
- ('id',np.int), # particle id
- ('x', np.double), # x
- ('y',np.double), # y
- ('z',np.double), # z
- ('t0',np.double), # t0
- ('ke',np.double), # kinetic energy
- ('theta',np.double), # direction of 1st particle
- ('phi',np.double), # direction of 2nd particle
- ('fmin',np.double), # negative log likelihood
- ('psi',np.double)] # goodness of fit parameter
- )
- df = pd.DataFrame.from_records(array)
-
- array = np.array(events,
- dtype=[('run',np.int), # run number
- ('gtr',np.double), # 50 MHz clock in ns
- ('nhit',np.int), # number of PMTs hit
- ('gtid',np.int), # gtid
- ('dc',np.int), # data cleaning word
- ('ftpx',np.double), # FTP fitter X position
- ('ftpy',np.double), # FTP fitter Y position
- ('ftpz',np.double), # FTP fitter Z position
- ('rsp_energy',np.double)] # RSP energy
- )
- df_ev = pd.DataFrame.from_records(array)
+ ev = pd.concat([pd.read_hdf(filename, "ev") for filename in args.filenames])
+ fits = pd.concat([pd.read_hdf(filename, "fits") for filename in args.filenames])
+
+ # This is a bit of a hack. It appears that many times the fit will
+ # actually do much better by including a very low energy electron or
+ # muon. I believe the reason for this is that of course my likelihood
+ # function is not perfect (for example, I don't include the correct
+ # angular distribution for Rayleigh scattered light), and so the fitter
+ # often wants to add a very low energy electron or muon to fix things.
+ #
+ # Ideally I would fix the likelihood function, but for now we just
+ # discard any fit results which have a very low energy electron or
+ # muon.
+ #
+ # FIXME: Test this since query() is new to pandas
+ fits = fits.query('not (n > 1 and ((id1 == 20 and energy1 < 20) or (id2 == 20 and energy2 < 20) or (id3 == 20 and energy3 < 20)))')
+ fits = fits.query('not (n > 1 and ((id2 == 22 and energy1 < 200) or (id2 == 22 and energy2 < 200) or (id3 == 22 and energy3 < 200)))')
+
+ # Calculate the approximate Ockham factor.
+ # See Chapter 20 in "Probability Theory: The Logic of Science" by Jaynes
+ #
+ # Note: This is a really approximate form by assuming that the shape of
+ # the likelihood space is equal to the average uncertainty in the
+ # different parameters.
+ fits['w'] = fits['n']*np.log(0.1*0.001) + np.log(fits['energy1']) + fits['n']*np.log(1e-4/(4*np.pi))
+
+ # Note: we index on the left hand site with loc to avoid a copy error
+ #
+ # See https://www.dataquest.io/blog/settingwithcopywarning/
+ fits.loc[fits['n'] > 1, 'w'] += np.log(fits[fits['n'] > 1]['energy2'])
+ fits.loc[fits['n'] > 2, 'w'] += np.log(fits[fits['n'] > 2]['energy3'])
+
+ fits['fmin'] = fits['fmin'] - fits['w']
+
+ fits['psi'] /= fits.merge(ev,on=['run','gtid'])['nhit']
+ fits['ke'] = fits['energy1']
+ fits['id'] = fits['id1'] + fits['id2']*100 + fits['id3']*10000
+ fits['theta'] = fits['theta1']
# Make sure events are in order. We use run number and GTID here which
# should be in time order as well except for bit flips in the GTID
# This is important for later when we look at time differences in the 50
# MHz clock. We should perhaps do a check here based on the 10 MHz clock to
# make sure things are in order
- df_ev = df_ev.sort_values(by=['run','gtid'])
+ ev = ev.sort_values(by=['run','gtid'])
- print("number of events = %i" % len(df_ev))
+ print("number of events = %i" % len(ev))
# First, do basic data cleaning which is done for all events.
- df_ev = df_ev[df_ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK) == 0]
+ ev = ev[ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK) == 0]
- print("number of events after data cleaning = %i" % len(df_ev))
+ print("number of events after data cleaning = %i" % len(ev))
# Now, we select events tagged by the muon tag which should tag only
# external muons. We keep the sample of muons since it's needed later to
# identify Michel electrons and to apply the muon follower cut
- muons = df_ev[(df_ev.dc & DC_MUON) != 0]
+ muons = ev[(ev.dc & DC_MUON) != 0]
print("number of muons = %i" % len(muons))
@@ -251,12 +180,12 @@ if __name__ == '__main__':
#
# Should I use 10 MHz clock here since the time isn't critical and then I
# don't have to worry about 50 MHz clock rollover?
- prompt = df_ev[df_ev.nhit >= 100]
+ prompt = ev[ev.nhit >= 100]
prompt_mask = np.concatenate(([True],np.diff(prompt.gtr.values) > 250e6))
# Michel electrons and neutrons can be any event which is not a prompt
# event
- follower = pd.concat([df_ev[df_ev.nhit < 100],prompt[~prompt_mask]])
+ follower = pd.concat([ev[ev.nhit < 100],prompt[~prompt_mask]])
# Apply the prompt mask
prompt = prompt[prompt_mask]
@@ -275,39 +204,78 @@ if __name__ == '__main__':
if prompt_plus_muons.size and follower.size:
# require Michel events to pass more of the SNO data cleaning cuts
michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == DC_FTS]
+
michel = michel[michel.nhit >= 100]
- # accept events which had a muon more than 800 nanoseconds but less than 20 microseconds before them
- # The 800 nanoseconds cut comes from Richie's thesis. He also mentions
- # that the In Time Channel Spread Cut is very effective at cutting
- # electrons events caused by muons, so I should implement that
- michel = michel[np.any((michel.run.values == prompt_plus_muons.run.values[:,np.newaxis]) & \
- (michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \
- (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3),axis=0)]
+
+ # Accept events which had a muon more than 800 nanoseconds but less
+ # than 20 microseconds before them. The 800 nanoseconds cut comes from
+ # Richie's thesis. He also mentions that the In Time Channel Spread Cut
+ # is very effective at cutting electrons events caused by muons, so I
+ # should implement that.
+ #
+ # Note: We currently don't look across run boundaries. This should be a
+ # *very* small effect, and the logic to do so would be very complicated
+ # since I would have to deal with 50 MHz clock rollovers, etc.
+ #
+ # Do a simple python for loop here over the runs since we create
+ # temporary arrays with shape (michel.size,prompt_plus_muons.size)
+ # which could be too big for the full dataset.
+ #
+ # There might be some clever way to do this without the loop in Pandas,
+ # but I don't know how.
+ michel_sum = None
+ for run, michel_run in michel.groupby(['run']):
+ prompt_plus_muons_run = prompt_plus_muons[prompt_plus_muons['run'] == run]
+ michel_run = michel_run[np.any((michel_run.gtr.values > prompt_plus_muons_run.gtr.values[:,np.newaxis] + 800) & \
+ (michel_run.gtr.values < prompt_plus_muons_run.gtr.values[:,np.newaxis] + 20e3),axis=0)]
+
+ if michel_sum is None:
+ michel_sum = michel_run
+ else:
+ michel_sum = michel_sum.append(michel_run)
+
+ michel = michel_sum
else:
michel = pd.DataFrame(columns=follower.columns)
if prompt.size and follower.size:
# neutron followers have to obey stricter set of data cleaning cuts
neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == DC_FTS]
- neutron = neutron[~np.isnan(neutron.ftpx) & ~np.isnan(neutron.rsp_energy)]
- r = np.sqrt(neutron.ftpx**2 + neutron.ftpy**2 + neutron.ftpz**2)
+ neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)]
+ r = np.sqrt(neutron.ftp_x**2 + neutron.ftp_y**2 + neutron.ftp_z**2)
neutron = neutron[r < AV_RADIUS]
neutron = neutron[neutron.rsp_energy > 4.0]
+
# neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt)
# FIXME: need to deal with 50 MHz clock rollover
- neutron_mask = np.any((neutron.run.values == prompt.run.values[:,np.newaxis]) & \
- (neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \
- (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1)
- df_atm = prompt[neutron_mask]
- prompt = prompt[~neutron_mask]
+ prompt_sum = None
+ atm_sum = None
+ for run, prompt_run in prompt.groupby(['run']):
+ neutron_run = neutron[neutron['run'] == run]
+
+ neutron_mask = np.any((neutron_run.gtr.values > prompt_run.gtr.values[:,np.newaxis] + 20e3) & \
+ (neutron_run.gtr.values < prompt_run.gtr.values[:,np.newaxis] + 250e6),axis=1)
+
+ if prompt_sum is None:
+ prompt_sum = prompt_run[~neutron_mask]
+ else:
+ prompt_sum = prompt_sum.append(prompt_run[~neutron_mask])
+
+ if atm_sum is None:
+ atm_sum = prompt_run[neutron_mask]
+ else:
+ atm_sum = atm_sum.append(prompt_run[neutron_mask])
+
+ atm = atm_sum
+ prompt = prompt_sum
else:
- df_atm = pd.DataFrame(columns=prompt.columns)
+ atm = pd.DataFrame(columns=prompt.columns)
print("number of events after neutron follower cut = %i" % len(prompt))
# Get rid of muon events in our main event sample
prompt = prompt[(prompt.dc & DC_MUON) == 0]
- df_atm = df_atm[(df_atm.dc & DC_MUON) == 0]
+ atm = atm[(atm.dc & DC_MUON) == 0]
print("number of events after muon cut = %i" % len(prompt))
@@ -317,52 +285,52 @@ if __name__ == '__main__':
prompt = prompt[~np.any((prompt.run.values == muons.run.values[:,np.newaxis]) & \
(prompt.gtr.values > muons.gtr.values[:,np.newaxis]) & \
(prompt.gtr.values <= (muons.gtr.values[:,np.newaxis] + 200e3)),axis=0)]
- df_atm = df_atm[~np.any((df_atm.run.values == muons.run.values[:,np.newaxis]) & \
- (df_atm.gtr.values > muons.gtr.values[:,np.newaxis]) & \
- (df_atm.gtr.values <= (muons.gtr.values[:,np.newaxis] + 200e3)),axis=0)]
+ atm = atm[~np.any((atm.run.values == muons.run.values[:,np.newaxis]) & \
+ (atm.gtr.values > muons.gtr.values[:,np.newaxis]) & \
+ (atm.gtr.values <= (muons.gtr.values[:,np.newaxis] + 200e3)),axis=0)]
# Check to see if there are any events with missing fit information
- df_atm_ra = df_atm[['run','gtid']].to_records(index=False)
+ atm_ra = atm[['run','gtid']].to_records(index=False)
muons_ra = muons[['run','gtid']].to_records(index=False)
prompt_ra = prompt[['run','gtid']].to_records(index=False)
michel_ra = michel[['run','gtid']].to_records(index=False)
- df_ra = df[['run','gtid']].to_records(index=False)
+ fits_ra = fits[['run','gtid']].to_records(index=False)
- if np.count_nonzero(~np.isin(df_atm_ra,df_ra)):
- print_warning("skipping %i atmospheric events because they are missing fit information!" % np.count_nonzero(~np.isin(df_atm_ra,df_ra)))
+ if len(atm_ra) and np.count_nonzero(~np.isin(atm_ra,fits_ra)):
+ print_warning("skipping %i atmospheric events because they are missing fit information!" % np.count_nonzero(~np.isin(atm_ra,fits_ra)))
- if np.count_nonzero(~np.isin(muons_ra,df_ra)):
- print_warning("skipping %i muon events because they are missing fit information!" % np.count_nonzero(~np.isin(muons_ra,df_ra)))
+ if len(muons_ra) and np.count_nonzero(~np.isin(muons_ra,fits_ra)):
+ print_warning("skipping %i muon events because they are missing fit information!" % np.count_nonzero(~np.isin(muons_ra,fits_ra)))
- if np.count_nonzero(~np.isin(prompt_ra,df_ra)):
- print_warning("skipping %i signal events because they are missing fit information!" % np.count_nonzero(~np.isin(prompt_ra,df_ra)))
+ if len(prompt_ra) and np.count_nonzero(~np.isin(prompt_ra,fits_ra)):
+ print_warning("skipping %i signal events because they are missing fit information!" % np.count_nonzero(~np.isin(prompt_ra,fits_ra)))
- if np.count_nonzero(~np.isin(michel_ra,df_ra)):
- print_warning("skipping %i Michel events because they are missing fit information!" % np.count_nonzero(~np.isin(michel_ra,df_ra)))
+ if len(michel_ra) and np.count_nonzero(~np.isin(michel_ra,fits_ra)):
+ print_warning("skipping %i Michel events because they are missing fit information!" % np.count_nonzero(~np.isin(michel_ra,fits_ra)))
# Now, we merge the event info with the fitter info.
#
# Note: This means that the dataframe now contains multiple rows for each
# event, one for each fit hypothesis.
- df_atm = pd.merge(df,df_atm,how='inner',on=['run','gtid'])
- muons = pd.merge(df,muons,how='inner',on=['run','gtid'])
- michel = pd.merge(df,michel,how='inner',on=['run','gtid'])
- df = pd.merge(df,prompt,how='inner',on=['run','gtid'])
+ atm = pd.merge(fits,atm,how='inner',on=['run','gtid'])
+ muons = pd.merge(fits,muons,how='inner',on=['run','gtid'])
+ michel = pd.merge(fits,michel,how='inner',on=['run','gtid'])
+ prompt = pd.merge(fits,prompt,how='inner',on=['run','gtid'])
# get rid of events which don't have a fit
- nan = np.isnan(df.fmin.values)
+ nan = np.isnan(prompt.fmin.values)
if np.count_nonzero(nan):
- print_warning("skipping %i signal events because the negative log likelihood is nan!" % len(df[nan].groupby(['run','gtid'])))
+ print_warning("skipping %i signal events because the negative log likelihood is nan!" % len(prompt[nan].groupby(['run','gtid'])))
- df = df[~nan]
+ prompt = prompt[~nan]
- nan_atm = np.isnan(df_atm.fmin.values)
+ nan_atm = np.isnan(atm.fmin.values)
if np.count_nonzero(nan_atm):
- print_warning("skipping %i atmospheric events because the negative log likelihood is nan!" % len(df_atm[nan_atm].groupby(['run','gtid'])))
+ print_warning("skipping %i atmospheric events because the negative log likelihood is nan!" % len(atm[nan_atm].groupby(['run','gtid'])))
- df_atm = df_atm[~nan_atm]
+ atm = atm[~nan_atm]
nan_muon = np.isnan(muons.fmin.values)
@@ -379,24 +347,24 @@ if __name__ == '__main__':
michel = michel[~nan_michel]
# get the best fit
- df = df.sort_values('fmin').groupby(['run','gtid']).first()
- df_atm = df_atm.sort_values('fmin').groupby(['run','gtid']).first()
+ prompt = prompt.sort_values('fmin').groupby(['run','gtid']).first()
+ atm = atm.sort_values('fmin').groupby(['run','gtid']).first()
michel_best_fit = michel.sort_values('fmin').groupby(['run','gtid']).first()
muon_best_fit = muons.sort_values('fmin').groupby(['run','gtid']).first()
muons = muons[muons.id == 22]
# require r < 6 meters
- df = df[np.sqrt(df.x.values**2 + df.y.values**2 + df.z.values**2) < AV_RADIUS]
- df_atm = df_atm[np.sqrt(df_atm.x.values**2 + df_atm.y.values**2 + df_atm.z.values**2) < AV_RADIUS]
+ prompt = prompt[np.sqrt(prompt.x.values**2 + prompt.y.values**2 + prompt.z.values**2) < AV_RADIUS]
+ atm = atm[np.sqrt(atm.x.values**2 + atm.y.values**2 + atm.z.values**2) < AV_RADIUS]
- print("number of events after radius cut = %i" % len(df))
+ print("number of events after radius cut = %i" % len(prompt))
# Note: Need to design and apply a psi based cut here
plt.figure()
- plot_hist(df,"Without Neutron Follower")
+ plot_hist(prompt,"Without Neutron Follower")
plt.figure()
- plot_hist(df_atm,"With Neutron Follower")
+ plot_hist(atm,"With Neutron Follower")
plt.figure()
plot_hist(michel_best_fit,"Michel Electrons")
plt.figure()
diff --git a/utils/plot-fit-results b/utils/plot-fit-results
index 773a0dc..7115b81 100755
--- a/utils/plot-fit-results
+++ b/utils/plot-fit-results
@@ -15,11 +15,6 @@
# this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import print_function, division
-import yaml
-try:
- from yaml import CLoader as Loader
-except ImportError:
- from yaml.loader import SafeLoader as Loader
import numpy as np
from scipy.stats import iqr
from matplotlib.lines import Line2D
@@ -71,228 +66,137 @@ def get_stats(x):
error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std)
return mean, std/np.sqrt(n), std, error
+def std_err(x):
+ x = x.dropna()
+ mean = np.mean(x)
+ std = np.std(x)
+ n = len(x)
+ if n == 0:
+ return np.nan
+ u4 = np.mean((x-mean)**4)
+ error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std)
+ return error
+
if __name__ == '__main__':
import argparse
import matplotlib.pyplot as plt
import numpy as np
+ import h5py
+ import pandas as pd
parser = argparse.ArgumentParser("plot fit results")
parser.add_argument("filenames", nargs='+', help="input files")
args = parser.parse_args()
- events = []
-
- for filename in args.filenames:
- print(filename)
- with open(filename) as f:
- data = yaml.load(f.read(),Loader=Loader)
-
- a = np.ma.empty(len(data['data']),
- dtype=[('id',np.int), # particle id
- ('T', np.double), # true energy
- ('dx',np.double), # dx
- ('dy',np.double), # dy
- ('dz',np.double), # dz
- ('dT',np.double), # dT
- ('theta',np.double), # theta
- ('ratio',np.double), # likelihood ratio
- ('te',np.double), # time electron
- ('tm',np.double), # time muon
- ('Te',np.double)] # electron energy
- )
-
- for i, event in enumerate(data['data']):
- # get the particle ID
- id = event['mcgn'][0]['id']
-
- a[i]['id'] = id
-
- if 'fit' not in event['ev'][0]:
- # if nhit < 100 we don't fit the event
- continue
-
- if id not in event['ev'][0]['fit']:
- a[i] = np.ma.masked
- continue
-
- mass = SNOMAN_MASS[id]
- # for some reason it's the *second* track which seems to contain the
- # initial energy
- true_energy = event['mcgn'][0]['energy']
- # The MCTK bank has the particle's total energy (except for neutrons)
- # so we need to convert it into kinetic energy
- ke = true_energy - mass
-
- fit = event['ev'][0]['fit']
-
- a[i]['T'] = ke
- energy = fit[id]['energy']
- a[i]['dT'] = energy-ke
-
- # store the fit position residuals
- true_posx = event['mcgn'][0]['posx']
- posx = fit[id]['posx']
- a[i]['dx'] = posx-true_posx
- true_posy = event['mcgn'][0]['posy']
- posy = fit[id]['posy']
- a[i]['dy'] = posy-true_posy
- true_posz = event['mcgn'][0]['posz']
- posz = fit[id]['posz']
- a[i]['dz'] = posz-true_posz
-
- # compute the angle between the fit direction and the true
- # direction
- dirx = event['mcgn'][0]['dirx']
- diry = event['mcgn'][0]['diry']
- dirz = event['mcgn'][0]['dirz']
- true_dir = [dirx,diry,dirz]
- true_dir = np.array(true_dir)/np.linalg.norm(true_dir)
- theta = fit[id]['theta']
- phi = fit[id]['phi']
- dir = [np.sin(theta)*np.cos(phi),np.sin(theta)*np.sin(phi),np.cos(theta)]
- dir = np.array(dir)/np.linalg.norm(dir)
- a[i]['theta'] = np.degrees(np.arccos(np.dot(true_dir,dir)))
-
- # compute the log likelihood ratio
- if IDP_E_MINUS in fit and IDP_MU_MINUS in fit:
- fmin_electron = fit[IDP_E_MINUS]['fmin']
- fmin_muon = fit[IDP_MU_MINUS]['fmin']
- a[i]['ratio'] = fmin_muon-fmin_electron
- else:
- a[i]['ratio'] = np.ma.masked
-
- # store the time taken for electron and muon fits
- if IDP_E_MINUS in fit:
- a[i]['te'] = fit[IDP_E_MINUS]['time']
- a[i]['Te'] = fit[IDP_E_MINUS]['energy']
- else:
- a[i]['te'] = np.ma.masked
- a[i]['Te'] = np.ma.masked
- if IDP_MU_MINUS in fit:
- a[i]['tm'] = fit[IDP_MU_MINUS]['time']
- else:
- a[i]['tm'] = np.ma.masked
-
- events.append(a)
-
- a = np.ma.concatenate(events)
-
+ # Read in all the data.
+ #
+ # Note: We have to add the filename as a column to each dataset since this
+ # script is used to analyze MC data which all has the same run number.
+ ev = pd.concat([pd.read_hdf(filename, "ev").assign(filename=filename) for filename in args.filenames])
+ fits = pd.concat([pd.read_hdf(filename, "fits").assign(filename=filename) for filename in args.filenames])
+ mcgn = pd.concat([pd.read_hdf(filename, "mcgn").assign(filename=filename) for filename in args.filenames])
+
+ # get rid of 2nd events like Michel electrons
+ ev = ev.sort_values(['run','gtid']).groupby(['filename','evn'],as_index=False).first()
+
+ # Now, we merge all three datasets together to produce a single
+ # dataframe. To do so, we join the ev dataframe with the mcgn frame
+ # on the evn column, and then join with the fits on the run and
+ # gtid columns.
+ #
+ # At the end we will have a single dataframe with one row for each
+ # fit, i.e. it will look like:
+ #
+ # >>> data
+ # run gtid nhit, ... mcgn_x, mcgn_y, mcgn_z, ..., fit_id1, fit_x, fit_y, fit_z, ...
+ #
+ # Before merging, we prefix the primary seed track table with mcgn_
+ # and the fit table with fit_ just to make things easier.
+
+ # Prefix track and fit frames
+ mcgn = mcgn.add_prefix("mcgn_")
+ fits = fits.add_prefix("fit_")
+
+ # merge ev and mcgn on evn
+ data = ev.merge(mcgn,left_on=['filename','evn'],right_on=['mcgn_filename','mcgn_evn'])
+ # merge data and fits on run and gtid
+ data = data.merge(fits,left_on=['filename','run','gtid'],right_on=['fit_filename','fit_run','fit_gtid'])
+
+ # calculate true kinetic energy
+ mass = [SNOMAN_MASS[id] for id in data['mcgn_id'].values]
+ data['T'] = data['mcgn_energy'].values - mass
+ data['dx'] = data['fit_x'].values - data['mcgn_x'].values
+ data['dy'] = data['fit_y'].values - data['mcgn_y'].values
+ data['dz'] = data['fit_z'].values - data['mcgn_z'].values
+ data['dT'] = data['fit_energy1'].values - data['T'].values
+
+ true_dir = np.dstack((data['mcgn_dirx'],data['mcgn_diry'],data['mcgn_dirz'])).squeeze()
+ dir = np.dstack((np.sin(data['fit_theta1'])*np.cos(data['fit_phi1']),
+ np.sin(data['fit_theta1'])*np.sin(data['fit_phi1']),
+ np.cos(data['fit_theta1']))).squeeze()
+
+ data['theta'] = np.degrees(np.arccos((true_dir*dir).sum(axis=-1)))
+
+ # only select fits which have at least 2 fits
+ data = data.groupby(['filename','run','gtid']).filter(lambda x: len(x) > 1)
+ data_true = data[data['fit_id1'] == data['mcgn_id']]
+ data_e = data[data['fit_id1'] == IDP_E_MINUS]
+ data_mu = data[data['fit_id1'] == IDP_MU_MINUS]
+
+ data_true = data_true.set_index(['filename','run','gtid'])
+ data_e = data_e.set_index(['filename','run','gtid'])
+ data_mu = data_mu.set_index(['filename','run','gtid'])
+
+ data_true['ratio'] = data_mu['fit_fmin']-data_e['fit_fmin']
+ data_true['te'] = data_e['fit_time']
+ data_true['tm'] = data_mu['fit_time']
+ data_true['Te'] = data_e['fit_energy1']
+
+ # 100 bins between 50 MeV and 1 GeV
bins = np.arange(50,1000,100)
- stats_array = np.ma.empty(len(bins)-1,
- dtype=[('T', np.double),
- ('dT', np.double),
- ('dT_err', np.double),
- ('dT_std', np.double),
- ('dT_std_err', np.double),
- ('dx', np.double),
- ('dx_err', np.double),
- ('dx_std', np.double),
- ('dx_std_err', np.double),
- ('dy', np.double),
- ('dy_err', np.double),
- ('dy_std', np.double),
- ('dy_std_err', np.double),
- ('dz', np.double),
- ('dz_err', np.double),
- ('dz_std', np.double),
- ('dz_std_err', np.double),
- ('theta', np.double),
- ('theta_err', np.double),
- ('theta_std', np.double),
- ('theta_std_err', np.double)])
-
- stats = {IDP_E_MINUS: stats_array, IDP_MU_MINUS: stats_array.copy()}
-
- for id in stats:
- electron_events = a[a['id'] == id]
-
- for i, (ablah, b) in enumerate(zip(bins[:-1], bins[1:])):
- events = electron_events[(electron_events['T'] >= ablah) & (electron_events['T'] < b)]
-
- if len(events) < 2:
- stats[id][i] = np.ma.masked
- continue
-
- stats[id][i]['T'] = (ablah+b)/2
- mean, mean_error, std, std_error = get_stats(events['dT'].compressed())
- stats[id][i]['dT'] = mean
- stats[id][i]['dT_err'] = mean_error
- stats[id][i]['dT_std'] = std
- stats[id][i]['dT_std_err'] = std_error
- mean, mean_error, std, std_error = get_stats(events['dx'].compressed())
- stats[id][i]['dx'] = mean
- stats[id][i]['dx_err'] = mean_error
- stats[id][i]['dx_std'] = std
- stats[id][i]['dx_std_err'] = std_error
- mean, mean_error, std, std_error = get_stats(events['dy'].compressed())
- stats[id][i]['dy'] = mean
- stats[id][i]['dy_err'] = mean_error
- stats[id][i]['dy_std'] = std
- stats[id][i]['dy_std_err'] = std_error
- mean, mean_error, std, std_error = get_stats(events['dz'].compressed())
- stats[id][i]['dz'] = mean
- stats[id][i]['dz_err'] = mean_error
- stats[id][i]['dz_std'] = std
- stats[id][i]['dz_std_err'] = std_error
- mean, mean_error, std, std_error = get_stats(events['theta'].compressed())
- stats[id][i]['theta'] = mean
- stats[id][i]['theta_err'] = mean_error
- stats[id][i]['theta_std'] = std
- stats[id][i]['theta_std_err'] = std_error
-
- for id in stats:
+ for id in [IDP_E_MINUS, IDP_MU_MINUS]:
+ events = data_true[data_true['mcgn_id'] == id]
+
+ pd_bins = pd.cut(events['T'],bins)
+
+ dT = events.groupby(pd_bins)['dT'].agg(['mean','sem','std',std_err])
+ dx = events.groupby(pd_bins)['dx'].agg(['mean','sem','std',std_err])
+ dy = events.groupby(pd_bins)['dy'].agg(['mean','sem','std',std_err])
+ dz = events.groupby(pd_bins)['dz'].agg(['mean','sem','std',std_err])
+ theta = events.groupby(pd_bins)['theta'].agg(['mean','sem','std',std_err])
+
label = 'Muon' if id == IDP_MU_MINUS else 'Electron'
- T = stats[id]['T']
- dT = stats[id]['dT']
- dT_err = stats[id]['dT_err']
- std_dT = stats[id]['dT_std']
- std_dT_err = stats[id]['dT_std_err']
- dx = stats[id]['dx']
- dx_err = stats[id]['dx_err']
- std_dx = stats[id]['dx_std']
- std_dx_err = stats[id]['dx_std_err']
- dy = stats[id]['dy']
- dy_err = stats[id]['dy_err']
- std_dy = stats[id]['dy_std']
- std_dy_err = stats[id]['dy_std_err']
- dz = stats[id]['dz']
- dz_err = stats[id]['dz_err']
- std_dz = stats[id]['dz_std']
- std_dz_err = stats[id]['dz_std_err']
- theta = stats[id]['theta']
- theta_err = stats[id]['theta_err']
- std_theta = stats[id]['theta_std']
- std_theta_err = stats[id]['theta_std_err']
+ T = (bins[1:] + bins[:-1])/2
plt.figure(1)
- plt.errorbar(T,dT*100/T,yerr=dT_err*100/T,fmt='o',label=label)
+ plt.errorbar(T,dT['mean']*100/T,yerr=dT['sem']*100/T,fmt='o',label=label)
plt.xlabel("Kinetic Energy (MeV)")
plt.ylabel("Energy bias (%)")
plt.title("Energy Bias")
plt.legend()
plt.figure(2)
- plt.errorbar(T,std_dT*100/T,yerr=std_dT_err*100/T,fmt='o',label=label)
+ plt.errorbar(T,dT['std']*100/T,yerr=dT['std_err']*100/T,fmt='o',label=label)
plt.xlabel("Kinetic Energy (MeV)")
plt.ylabel("Energy resolution (%)")
plt.title("Energy Resolution")
plt.legend()
plt.figure(3)
- plt.errorbar(T,dx,yerr=dx_err,fmt='o',label='%s (x)' % label)
- plt.errorbar(T,dy,yerr=dy_err,fmt='o',label='%s (y)' % label)
- plt.errorbar(T,dz,yerr=dz_err,fmt='o',label='%s (z)' % label)
+ plt.errorbar(T,dx['mean'],yerr=dx['sem'],fmt='o',label='%s (x)' % label)
+ plt.errorbar(T,dy['mean'],yerr=dy['sem'],fmt='o',label='%s (y)' % label)
+ plt.errorbar(T,dz['mean'],yerr=dz['sem'],fmt='o',label='%s (z)' % label)
plt.xlabel("Kinetic Energy (MeV)")
plt.ylabel("Position bias (cm)")
plt.title("Position Bias")
plt.legend()
plt.figure(4)
- plt.errorbar(T,std_dx,yerr=std_dx_err,fmt='o',label='%s (x)' % label)
- plt.errorbar(T,std_dy,yerr=std_dy_err,fmt='o',label='%s (y)' % label)
- plt.errorbar(T,std_dz,yerr=std_dz_err,fmt='o',label='%s (z)' % label)
+ plt.errorbar(T,dx['std'],yerr=dx['std_err'],fmt='o',label='%s (x)' % label)
+ plt.errorbar(T,dy['std'],yerr=dy['std_err'],fmt='o',label='%s (y)' % label)
+ plt.errorbar(T,dz['std'],yerr=dz['std_err'],fmt='o',label='%s (z)' % label)
plt.xlabel("Kinetic Energy (MeV)")
plt.ylabel("Position resolution (cm)")
plt.title("Position Resolution")
@@ -300,7 +204,7 @@ if __name__ == '__main__':
plt.legend()
plt.figure(5)
- plt.errorbar(T,std_theta,yerr=std_theta_err,fmt='o',label=label)
+ plt.errorbar(T,theta['std'],yerr=theta['std_err'],fmt='o',label=label)
plt.xlabel("Kinetic Energy (MeV)")
plt.ylabel("Angular resolution (deg)")
plt.title("Angular Resolution")
@@ -308,9 +212,10 @@ if __name__ == '__main__':
plt.legend()
plt.figure(6)
- plt.scatter(a[a['id'] == id]['Te'],a[a['id'] == id]['ratio'],label=label)
+ plt.scatter(events['Te'],events['ratio'],label=label)
plt.xlabel("Reconstructed Electron Energy (MeV)")
plt.ylabel(r"Log Likelihood Ratio (e/$\mu$)")
plt.title("Log Likelihood Ratio vs Reconstructed Electron Energy")
plt.legend()
+
plt.show()
diff --git a/utils/submit-grid-jobs b/utils/submit-grid-jobs
index 670172d..63cbcb7 100755
--- a/utils/submit-grid-jobs
+++ b/utils/submit-grid-jobs
@@ -24,6 +24,83 @@ import string
from os.path import split, splitext, join, abspath
import uuid
from subprocess import check_call
+import os
+import sys
+
+# Next two functions are a backport of the shutil.which() function from Python
+# 3.3 from Lib/shutil.py in the CPython code See
+# https://github.com/python/cpython/blob/master/Lib/shutil.py.
+
+# Check that a given file can be accessed with the correct mode.
+# Additionally check that `file` is not a directory, as on Windows
+# directories pass the os.access check.
+def _access_check(fn, mode):
+ return (os.path.exists(fn) and os.access(fn, mode) and not os.path.isdir(fn))
+
+def which(cmd, mode=os.F_OK | os.X_OK, path=None):
+ """Given a command, mode, and a PATH string, return the path which
+ conforms to the given mode on the PATH, or None if there is no such
+ file.
+ `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
+ of os.environ.get("PATH"), or can be overridden with a custom search
+ path.
+ """
+ # If we're given a path with a directory part, look it up directly rather
+ # than referring to PATH directories. This includes checking relative to the
+ # current directory, e.g. ./script
+ if os.path.dirname(cmd):
+ if _access_check(cmd, mode):
+ return cmd
+ return None
+
+ if path is None:
+ path = os.environ.get("PATH", None)
+ if path is None:
+ try:
+ path = os.confstr("CS_PATH")
+ except (AttributeError, ValueError):
+ # os.confstr() or CS_PATH is not available
+ path = os.defpath
+ # bpo-35755: Don't use os.defpath if the PATH environment variable is
+ # set to an empty string
+
+ # PATH='' doesn't match, whereas PATH=':' looks in the current directory
+ if not path:
+ return None
+
+ path = path.split(os.pathsep)
+
+ if sys.platform == "win32":
+ # The current directory takes precedence on Windows.
+ curdir = os.curdir
+ if curdir not in path:
+ path.insert(0, curdir)
+
+ # PATHEXT is necessary to check on Windows.
+ pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
+ # See if the given file matches any of the expected path extensions.
+ # This will allow us to short circuit when given "python.exe".
+ # If it does match, only test that one, otherwise we have to try
+ # others.
+ if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
+ files = [cmd]
+ else:
+ files = [cmd + ext for ext in pathext]
+ else:
+ # On other platforms you don't have things like PATHEXT to tell you
+ # what file suffixes are executable, so just pass on cmd as-is.
+ files = [cmd]
+
+ seen = set()
+ for dir in path:
+ normdir = os.path.normcase(dir)
+ if not normdir in seen:
+ seen.add(normdir)
+ for thefile in files:
+ name = os.path.join(dir, thefile)
+ if _access_check(name, mode):
+ return name
+ return None
CONDOR_TEMPLATE = \
"""
@@ -42,7 +119,7 @@ log = @log
# The below are good base requirements for first testing jobs on OSG,
# if you don't have a good idea of memory and disk usage.
-requirements = (OSGVO_OS_STRING == "RHEL 7") && (OpSys == "LINUX")
+requirements = (HAS_MODULES =?= true) && (OSGVO_OS_STRING == "RHEL 7") && (OpSys == "LINUX")
request_cpus = 1
request_memory = 1 GB
request_disk = 1 GB
@@ -72,14 +149,24 @@ def submit_job(filename, run, gtid, dir, dqxx_dir, min_nhit, max_particles):
prefix = "%s_%08i_%s" % (root,gtid,ID.hex)
# fit output filename
- output = "%s.txt" % prefix
+ output = "%s.hdf5" % prefix
# condor submit filename
condor_submit = "%s.submit" % prefix
# set up the arguments for the template
- executable = join(dir,"fit")
+ executable = which("fit")
+ wrapper = which("fit-wrapper")
+
+ if executable is None:
+ print("couldn't find fit in path!",file=sys.stderr)
+ sys.exit(1)
+
+ if wrapper is None:
+ print("couldn't find fit-wrapper in path!",file=sys.stderr)
+ sys.exit(1)
+
args = [tail,"-o",output,"--gtid",gtid,"--min-nhit",min_nhit,"--max-particles",max_particles]
- transfer_input_files = ",".join([filename,join(dqxx_dir,"DQXX_%010i.dat" % run)] + [join(dir,filename) for filename in INPUT_FILES])
+ transfer_input_files = ",".join([executable,filename,join(dqxx_dir,"DQXX_%010i.dat" % run)] + [join(dir,filename) for filename in INPUT_FILES])
transfer_output_files = ",".join([output])
condor_error = "%s.error" % prefix
@@ -89,7 +176,7 @@ def submit_job(filename, run, gtid, dir, dqxx_dir, min_nhit, max_particles):
template = MyTemplate(CONDOR_TEMPLATE)
submit_string = template.safe_substitute(
- executable=executable,
+ executable=wrapper,
args=" ".join(map(str,args)),
transfer_input_files=transfer_input_files,
transfer_output_files=transfer_output_files,
@@ -106,13 +193,13 @@ def submit_job(filename, run, gtid, dir, dqxx_dir, min_nhit, max_particles):
if __name__ == '__main__':
import argparse
- import subprocess
+ from subprocess import check_call
import os
+ import tempfile
+ import h5py
parser = argparse.ArgumentParser("submit grid jobs")
parser.add_argument("filenames", nargs='+', help="input files")
- parser.add_argument("--dir", type=str, help="fitter directory", required=True)
- parser.add_argument("--dqxx-dir", type=str, help="dqxx directory", required=True)
parser.add_argument("--min-nhit", type=int, help="minimum nhit to fit an event", default=100)
parser.add_argument("--max-particles", type=int, help="maximum number of particles to fit for", default=3)
parser.add_argument("--skip-second-event", action='store_true', help="only fit the first event after a MAST bank", default=False)
@@ -123,14 +210,32 @@ if __name__ == '__main__':
home = os.path.expanduser("~")
args.state = join(home,'state.txt')
+ if 'SDDM_DATA' not in os.environ:
+ print("Please set the SDDM_DATA environment variable to point to the fitter source code location", file=sys.stderr)
+ sys.exit(1)
+
+ dir = os.environ['SDDM_DATA']
+
+ if 'DQXX_DIR' not in os.environ:
+ print("Please set the DQXX_DIR environment variable to point to the directory with the DQXX files", file=sys.stderr)
+ sys.exit(1)
+
+ dqxx_dir = os.environ['DQXX_DIR']
+
args.state = abspath(args.state)
# get the current working directory
home_dir = os.getcwd()
# get absolute paths since we are going to create a new directory
- args.dir = abspath(args.dir)
- args.dqxx_dir = abspath(args.dqxx_dir)
+ dir = abspath(dir)
+ dqxx_dir = abspath(dqxx_dir)
+
+ zdab_cat = which("zdab-cat")
+
+ if zdab_cat is None:
+ print("couldn't find zdab-cat in path!",file=sys.stderr)
+ sys.exit(1)
for filename in args.filenames:
filename = abspath(filename)
@@ -152,29 +257,28 @@ if __name__ == '__main__':
continue
with open(os.devnull, 'w') as f:
+ # Create a temporary file to store the events. Docs recommended
+ # this method instead of mkstemp(), but I think they're the same.
+ output = tempfile.NamedTemporaryFile(suffix='.hdf5',delete=False)
+ output.close()
+
if args.skip_second_event:
- popen = subprocess.Popen([join(args.dir,"zdab-cat"),"--skip-second-event",filename],stdout=subprocess.PIPE,stderr=f)
+ check_call([zdab_cat,"--skip-second-event",filename,"-o",output.name],stderr=f)
else:
- popen = subprocess.Popen([join(args.dir,"zdab-cat"),filename],stdout=subprocess.PIPE,stderr=f)
+ check_call([zdab_cat,filename,"-o",output.name],stderr=f)
new_dir = "%s_%s" % (root,ID.hex)
os.mkdir(new_dir)
os.chdir(new_dir)
- for data in yaml.load_all(popen.stdout,Loader=Loader):
- if 'ev' not in data:
- continue
-
- for ev in data['ev']:
- run = ev['run']
- gtid = ev['gtid']
- nhit = ev['nhit']
-
- if nhit >= args.min_nhit:
- submit_job(filename, run, gtid, args.dir, args.dqxx_dir, args.min_nhit, args.max_particles)
+ with h5py.File(output.name) as f:
+ for ev in f['ev']:
+ if ev['nhit'] >= args.min_nhit:
+ submit_job(filename, ev['run'], ev['gtid'], dir, dqxx_dir, args.min_nhit, args.max_particles)
- popen.wait()
+ # Delete temporary HDF5 file
+ os.unlink(output.name)
state.append(tail)