diff options
Diffstat (limited to 'utils')
-rwxr-xr-x | utils/chi2 | 8 | ||||
-rwxr-xr-x | utils/dm-search | 8 | ||||
-rwxr-xr-x | utils/plot-michels | 3 | ||||
-rwxr-xr-x | utils/plot-muons | 5 | ||||
-rw-r--r-- | utils/sddm/__init__.py | 15 | ||||
-rw-r--r-- | utils/sddm/plot_energy.py | 33 |
6 files changed, 48 insertions, 24 deletions
@@ -552,7 +552,7 @@ if __name__ == '__main__': # Add the "flux_weight" column to the ev_mc data since I stupidly simulated # the muon neutrino flux for the tau neutrino flux in GENIE. Doh! mcpl = load_mcpl_files(args.mcpl) - ev_mc = renormalize_data(ev_mc.reset_index(),mcpl) + ev_mc = renormalize_data(ev_mc,mcpl) # Merge weights with MCPL dataframe to get the unique id column in the # weights dataframe since that is what we use to merge with the Monte @@ -591,11 +591,7 @@ if __name__ == '__main__': muon_mc = correct_energy_bias(muon_mc) # Set all prompt events in the MC to be muons - muon_mc.loc[muon_mc.prompt & muon_mc.filename.str.contains("cosmic"),'muon'] = True - - ev = ev.reset_index() - ev_mc = ev_mc.reset_index() - muon_mc = muon_mc.reset_index() + muon_mc.loc[muon_mc.prompt ,'muon'] = True # 00-orphan cut ev = ev[(ev.gtid & 0xff) != 0] diff --git a/utils/dm-search b/utils/dm-search index 8e2473c..cd91eab 100755 --- a/utils/dm-search +++ b/utils/dm-search @@ -616,7 +616,7 @@ if __name__ == '__main__': # Add the "flux_weight" column to the ev_mc data since I stupidly simulated # the muon neutrino flux for the tau neutrino flux in GENIE. Doh! mcpl = load_mcpl_files(args.mcpl) - ev_mc = renormalize_data(ev_mc.reset_index(),mcpl) + ev_mc = renormalize_data(ev_mc,mcpl) # Merge weights with MCPL dataframe to get the unique id column in the # weights dataframe since that is what we use to merge with the Monte @@ -653,11 +653,7 @@ if __name__ == '__main__': muon_mc = correct_energy_bias(muon_mc) # Set all prompt events in the MC to be muons - muon_mc.loc[muon_mc.prompt & muon_mc.filename.str.contains("cosmic"),'muon'] = True - - ev = ev.reset_index() - ev_mc = ev_mc.reset_index() - muon_mc = muon_mc.reset_index() + muon_mc.loc[muon_mc.prompt,'muon'] = True # 00-orphan cut ev = ev[(ev.gtid & 0xff) != 0] diff --git a/utils/plot-michels b/utils/plot-michels index dbd6fd5..e0e4dd7 100755 --- a/utils/plot-michels +++ b/utils/plot-michels @@ -131,9 +131,6 @@ if __name__ == '__main__': ev = ev.reset_index() ev_mc = ev_mc.reset_index() - # Set all prompt events in the MC to be muons - ev_mc.loc[ev_mc.prompt & ev_mc.filename.str.contains("cosmic"),'muon'] = True - # First, do basic data cleaning which is done for all events. ev = ev[ev.signal | ev.muon] ev_mc = ev_mc[ev_mc.signal | ev_mc.muon] diff --git a/utils/plot-muons b/utils/plot-muons index 2616ad2..a37f223 100755 --- a/utils/plot-muons +++ b/utils/plot-muons @@ -102,11 +102,8 @@ if __name__ == '__main__': ev = ev[~np.isnan(ev.fmin)] ev_mc = ev_mc[~np.isnan(ev_mc.fmin)] - ev = ev.reset_index() - ev_mc = ev_mc.reset_index() - # Set all prompt events in the MC to be muons - ev_mc.loc[ev_mc.prompt & ev_mc.filename.str.contains("cosmic"),'muon'] = True + ev_mc.loc[ev_mc.prompt,'muon'] = True # First, do basic data cleaning which is done for all events. ev = ev[ev.signal | ev.muon] diff --git a/utils/sddm/__init__.py b/utils/sddm/__init__.py index c6f2ddc..fb0fd69 100644 --- a/utils/sddm/__init__.py +++ b/utils/sddm/__init__.py @@ -193,7 +193,20 @@ def read_hdf(filename, df): """ with h5py.File(filename,'r') as f: data = f[df][:] - return pd.DataFrame(data) + df = pd.DataFrame(data) + df.attrs = dict(f.attrs) + return df + +def write_hdf(filename, df, name): + """ + Similar to pd.read_hdf() but doesn't require pytables. I'm adding this + function since the open science grid login nodes don't have pytables. + """ + with h5py.File(filename,'w') as f: + f.create_dataset(name,data=df.to_records()) + if hasattr(df,'attrs'): + for key, value in df.attrs.items(): + f.attrs[key] = value # from https://stackoverflow.com/questions/2891790/how-to-pretty-print-a-numpy-array-without-scientific-notation-and-with-given-pre @contextlib.contextmanager diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py index 391fa66..f81df67 100644 --- a/utils/sddm/plot_energy.py +++ b/utils/sddm/plot_energy.py @@ -19,11 +19,11 @@ import numpy as np from scipy.stats import poisson, dirichlet, multinomial, beta from scipy.special import spence, gammaln from .dc import DC_MUON, DC_JUNK, DC_CRATE_ISOTROPY, DC_QVNHIT, DC_NECK, DC_FLASHER, DC_ESUM, DC_OWL, DC_OWL_TRIGGER, DC_FTS, DC_ITC, DC_BREAKDOWN -from . import grouper, print_warning, AV_RADIUS, PSUP_RADIUS, read_hdf +from . import grouper, print_warning, AV_RADIUS, PSUP_RADIUS, read_hdf, write_hdf import pandas as pd from scipy.special import logsumexp -from os.path import exists, join -from os import environ +from os.path import exists, join, split, getmtime +from os import environ, mkdir, remove import hashlib EPSILON = 1e-10 @@ -482,7 +482,21 @@ def burst_cut(ev): return ev.groupby(burst.cumsum()).filter(lambda ev: len(ev[ev.prompt_50]) <= BURST_MAX_EVENTS).reset_index(drop=True) def get_events(filenames, merge_fits=False, nhit_thresh=None, mc=False): - ev = pd.concat([read_hdf(filename, "ev").assign(filename=filename) for filename in filenames],ignore_index=True) + h = hashlib.sha1() + + for filename in filenames: + head, tail = split(filename) + mtime = getmtime(filename) + h.update(tail.encode("utf-8")) + h.update(str(mtime).encode("utf-8")) + + hash_filename = join(environ['HOME'],'.cached_hdf5','%s.hdf5' % h.hexdigest()) + + if exists(hash_filename) and merge_fits: + print("loading result from '%s'" % hash_filename) + return read_hdf(hash_filename,"ev") + + ev = pd.concat([read_hdf(filename, "ev") for filename in filenames],ignore_index=True) fits = pd.concat([read_hdf(filename, "fits") for filename in filenames],ignore_index=True) rhdr = pd.concat([read_hdf(filename, "rhdr") for filename in filenames],ignore_index=True) if mc: @@ -727,10 +741,21 @@ def get_events(filenames, merge_fits=False, nhit_thresh=None, mc=False): np.cos(ev_single_particle.theta1)*ev_single_particle.z ev['udotr'] /= ev_single_particle.r + ev = ev.reset_index() + # Yes, this is super hacky, but I don't want to change what's returned from # this function. ev.attrs = {'time_pulse_gt': time_pulse_gt, 'time_10_mhz': time_10_mhz} + head, tail = split(hash_filename) + if not exists(head): + mkdir(head) + + try: + write_hdf(hash_filename,ev,"ev") + except KeyboardInterrupt as e: + remove(hash_filename) + return ev # Yes, this is super hacky, but I don't want to change what's returned from |