aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortlatorre <tlatorre@uchicago.edu>2021-01-03 11:40:47 -0600
committertlatorre <tlatorre@uchicago.edu>2021-01-03 11:40:47 -0600
commit6fc97e22fa2a8cc48a96b2ce22c4d55a8fd2b80f (patch)
treef5bf72de18002e0d126b8e37c628f68ec3926fb6
parentc71532924c8c2be07026041b7cd62819e90e8c64 (diff)
downloadsddm-6fc97e22fa2a8cc48a96b2ce22c4d55a8fd2b80f.tar.gz
sddm-6fc97e22fa2a8cc48a96b2ce22c4d55a8fd2b80f.tar.bz2
sddm-6fc97e22fa2a8cc48a96b2ce22c4d55a8fd2b80f.zip
cache results from get_events()
-rwxr-xr-xutils/chi28
-rwxr-xr-xutils/dm-search8
-rwxr-xr-xutils/plot-michels3
-rwxr-xr-xutils/plot-muons5
-rw-r--r--utils/sddm/__init__.py15
-rw-r--r--utils/sddm/plot_energy.py33
6 files changed, 48 insertions, 24 deletions
diff --git a/utils/chi2 b/utils/chi2
index c883deb..f4d94c4 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -552,7 +552,7 @@ if __name__ == '__main__':
# Add the "flux_weight" column to the ev_mc data since I stupidly simulated
# the muon neutrino flux for the tau neutrino flux in GENIE. Doh!
mcpl = load_mcpl_files(args.mcpl)
- ev_mc = renormalize_data(ev_mc.reset_index(),mcpl)
+ ev_mc = renormalize_data(ev_mc,mcpl)
# Merge weights with MCPL dataframe to get the unique id column in the
# weights dataframe since that is what we use to merge with the Monte
@@ -591,11 +591,7 @@ if __name__ == '__main__':
muon_mc = correct_energy_bias(muon_mc)
# Set all prompt events in the MC to be muons
- muon_mc.loc[muon_mc.prompt & muon_mc.filename.str.contains("cosmic"),'muon'] = True
-
- ev = ev.reset_index()
- ev_mc = ev_mc.reset_index()
- muon_mc = muon_mc.reset_index()
+ muon_mc.loc[muon_mc.prompt ,'muon'] = True
# 00-orphan cut
ev = ev[(ev.gtid & 0xff) != 0]
diff --git a/utils/dm-search b/utils/dm-search
index 8e2473c..cd91eab 100755
--- a/utils/dm-search
+++ b/utils/dm-search
@@ -616,7 +616,7 @@ if __name__ == '__main__':
# Add the "flux_weight" column to the ev_mc data since I stupidly simulated
# the muon neutrino flux for the tau neutrino flux in GENIE. Doh!
mcpl = load_mcpl_files(args.mcpl)
- ev_mc = renormalize_data(ev_mc.reset_index(),mcpl)
+ ev_mc = renormalize_data(ev_mc,mcpl)
# Merge weights with MCPL dataframe to get the unique id column in the
# weights dataframe since that is what we use to merge with the Monte
@@ -653,11 +653,7 @@ if __name__ == '__main__':
muon_mc = correct_energy_bias(muon_mc)
# Set all prompt events in the MC to be muons
- muon_mc.loc[muon_mc.prompt & muon_mc.filename.str.contains("cosmic"),'muon'] = True
-
- ev = ev.reset_index()
- ev_mc = ev_mc.reset_index()
- muon_mc = muon_mc.reset_index()
+ muon_mc.loc[muon_mc.prompt,'muon'] = True
# 00-orphan cut
ev = ev[(ev.gtid & 0xff) != 0]
diff --git a/utils/plot-michels b/utils/plot-michels
index dbd6fd5..e0e4dd7 100755
--- a/utils/plot-michels
+++ b/utils/plot-michels
@@ -131,9 +131,6 @@ if __name__ == '__main__':
ev = ev.reset_index()
ev_mc = ev_mc.reset_index()
- # Set all prompt events in the MC to be muons
- ev_mc.loc[ev_mc.prompt & ev_mc.filename.str.contains("cosmic"),'muon'] = True
-
# First, do basic data cleaning which is done for all events.
ev = ev[ev.signal | ev.muon]
ev_mc = ev_mc[ev_mc.signal | ev_mc.muon]
diff --git a/utils/plot-muons b/utils/plot-muons
index 2616ad2..a37f223 100755
--- a/utils/plot-muons
+++ b/utils/plot-muons
@@ -102,11 +102,8 @@ if __name__ == '__main__':
ev = ev[~np.isnan(ev.fmin)]
ev_mc = ev_mc[~np.isnan(ev_mc.fmin)]
- ev = ev.reset_index()
- ev_mc = ev_mc.reset_index()
-
# Set all prompt events in the MC to be muons
- ev_mc.loc[ev_mc.prompt & ev_mc.filename.str.contains("cosmic"),'muon'] = True
+ ev_mc.loc[ev_mc.prompt,'muon'] = True
# First, do basic data cleaning which is done for all events.
ev = ev[ev.signal | ev.muon]
diff --git a/utils/sddm/__init__.py b/utils/sddm/__init__.py
index c6f2ddc..fb0fd69 100644
--- a/utils/sddm/__init__.py
+++ b/utils/sddm/__init__.py
@@ -193,7 +193,20 @@ def read_hdf(filename, df):
"""
with h5py.File(filename,'r') as f:
data = f[df][:]
- return pd.DataFrame(data)
+ df = pd.DataFrame(data)
+ df.attrs = dict(f.attrs)
+ return df
+
+def write_hdf(filename, df, name):
+ """
+ Similar to pd.read_hdf() but doesn't require pytables. I'm adding this
+ function since the open science grid login nodes don't have pytables.
+ """
+ with h5py.File(filename,'w') as f:
+ f.create_dataset(name,data=df.to_records())
+ if hasattr(df,'attrs'):
+ for key, value in df.attrs.items():
+ f.attrs[key] = value
# from https://stackoverflow.com/questions/2891790/how-to-pretty-print-a-numpy-array-without-scientific-notation-and-with-given-pre
@contextlib.contextmanager
diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py
index 391fa66..f81df67 100644
--- a/utils/sddm/plot_energy.py
+++ b/utils/sddm/plot_energy.py
@@ -19,11 +19,11 @@ import numpy as np
from scipy.stats import poisson, dirichlet, multinomial, beta
from scipy.special import spence, gammaln
from .dc import DC_MUON, DC_JUNK, DC_CRATE_ISOTROPY, DC_QVNHIT, DC_NECK, DC_FLASHER, DC_ESUM, DC_OWL, DC_OWL_TRIGGER, DC_FTS, DC_ITC, DC_BREAKDOWN
-from . import grouper, print_warning, AV_RADIUS, PSUP_RADIUS, read_hdf
+from . import grouper, print_warning, AV_RADIUS, PSUP_RADIUS, read_hdf, write_hdf
import pandas as pd
from scipy.special import logsumexp
-from os.path import exists, join
-from os import environ
+from os.path import exists, join, split, getmtime
+from os import environ, mkdir, remove
import hashlib
EPSILON = 1e-10
@@ -482,7 +482,21 @@ def burst_cut(ev):
return ev.groupby(burst.cumsum()).filter(lambda ev: len(ev[ev.prompt_50]) <= BURST_MAX_EVENTS).reset_index(drop=True)
def get_events(filenames, merge_fits=False, nhit_thresh=None, mc=False):
- ev = pd.concat([read_hdf(filename, "ev").assign(filename=filename) for filename in filenames],ignore_index=True)
+ h = hashlib.sha1()
+
+ for filename in filenames:
+ head, tail = split(filename)
+ mtime = getmtime(filename)
+ h.update(tail.encode("utf-8"))
+ h.update(str(mtime).encode("utf-8"))
+
+ hash_filename = join(environ['HOME'],'.cached_hdf5','%s.hdf5' % h.hexdigest())
+
+ if exists(hash_filename) and merge_fits:
+ print("loading result from '%s'" % hash_filename)
+ return read_hdf(hash_filename,"ev")
+
+ ev = pd.concat([read_hdf(filename, "ev") for filename in filenames],ignore_index=True)
fits = pd.concat([read_hdf(filename, "fits") for filename in filenames],ignore_index=True)
rhdr = pd.concat([read_hdf(filename, "rhdr") for filename in filenames],ignore_index=True)
if mc:
@@ -727,10 +741,21 @@ def get_events(filenames, merge_fits=False, nhit_thresh=None, mc=False):
np.cos(ev_single_particle.theta1)*ev_single_particle.z
ev['udotr'] /= ev_single_particle.r
+ ev = ev.reset_index()
+
# Yes, this is super hacky, but I don't want to change what's returned from
# this function.
ev.attrs = {'time_pulse_gt': time_pulse_gt, 'time_10_mhz': time_10_mhz}
+ head, tail = split(hash_filename)
+ if not exists(head):
+ mkdir(head)
+
+ try:
+ write_hdf(hash_filename,ev,"ev")
+ except KeyboardInterrupt as e:
+ remove(hash_filename)
+
return ev
# Yes, this is super hacky, but I don't want to change what's returned from