#!/usr/bin/env python # Copyright (c) 2019, Anthony Latorre # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see . from __future__ import print_function, division import yaml try: from yaml import CLoader as Loader except ImportError: from yaml.loader import SafeLoader as Loader import numpy as np from scipy.stats import iqr from matplotlib.lines import Line2D # from https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python class bcolors: HEADER = '\033[95m' OKBLUE = '\033[94m' OKGREEN = '\033[92m' WARNING = '\033[93m' FAIL = '\033[91m' ENDC = '\033[0m' BOLD = '\033[1m' UNDERLINE = '\033[4m' # on retina screens, the default plots are way too small # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 # Qt5 will scale everything using the dpi in ~/.Xresources import matplotlib matplotlib.use("Qt5Agg") SNOMAN_MASS = { 20: 0.511, 21: 0.511, 22: 105.658, 23: 105.658 } AV_RADIUS = 600.0 # Data cleaning bitmasks. DC_MUON = 0x1 DC_JUNK = 0x2 DC_CRATE_ISOTROPY = 0x4 DC_QVNHIT = 0x8 DC_NECK = 0x10 DC_FLASHER = 0x20 DC_ESUM = 0x40 DC_OWL = 0x80 DC_OWL_TRIGGER = 0x100 DC_FTS = 0x200 DC_ITC = 0x400 def plot_hist(df, title=None): for id, df_id in sorted(df.groupby('id')): if id == 20: plt.subplot(3,4,1) elif id == 22: plt.subplot(3,4,2) elif id == 2020: plt.subplot(3,4,5) elif id == 2022: plt.subplot(3,4,6) elif id == 2222: plt.subplot(3,4,7) elif id == 202020: plt.subplot(3,4,9) elif id == 202022: plt.subplot(3,4,10) elif id == 202222: plt.subplot(3,4,11) elif id == 222222: plt.subplot(3,4,12) plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step') plt.xlabel("Energy (MeV)") plt.title(str(id)) if title: plt.suptitle(title) if len(df): plt.tight_layout() def chunks(l, n): """Yield successive n-sized chunks from l.""" for i in range(0, len(l), n): yield l[i:i + n] def print_warning(msg): print(bcolors.FAIL + msg + bcolors.ENDC,file=sys.stderr) def unwrap(p, delta, axis=-1): """ A modified version of np.unwrap() useful for unwrapping the 50 MHz clock. It unwraps discontinuities bigger than delta/2 by delta. Example: >>> a = np.arange(10) % 5 >>> a array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) >>> unwrap(a,5) array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]) In the case of the 50 MHz clock delta should be 0x7ffffffffff*20.0. """ p = np.asarray(p) nd = p.ndim dd = np.diff(p, axis=axis) slice1 = [slice(None, None)]*nd # full slices slice1[axis] = slice(1, None) slice1 = tuple(slice1) ddmod = np.mod(dd + delta/2, delta) - delta/2 np.copyto(ddmod, delta/2, where=(ddmod == -delta/2) & (dd > 0)) ph_correct = ddmod - dd np.copyto(ph_correct, 0, where=abs(dd) < delta/2) up = np.array(p, copy=True, dtype='d') up[slice1] = p[slice1] + ph_correct.cumsum(axis) return up def unwrap_50_mhz_clock(gtr): """ Unwrap an array with 50 MHz clock times. These times should all be in nanoseconds and come from the KEV_GTR variable in the EV bank. Note: We assume here that the events are already ordered contiguously by GTID, so you shouldn't pass an array with multiple runs! """ return unwrap(gtr,0x7ffffffffff*20.0) if __name__ == '__main__': import argparse import matplotlib.pyplot as plt import numpy as np import pandas as pd import sys import h5py parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") args = parser.parse_args() ev = pd.concat([pd.read_hdf(filename, "ev") for filename in args.filenames],ignore_index=True) fits = pd.concat([pd.read_hdf(filename, "fits") for filename in args.filenames],ignore_index=True) # First, remove junk events since orphans won't have a 50 MHz clock and so # could screw up the 50 MHz clock unwrapping ev = ev[ev.dc & DC_JUNK == 0] # make sure events are sorted by GTID ev = ev.sort_values(by=['run','gtid']) # unwrap the 50 MHz clock within each run ev.gtr = ev.groupby(['run'],as_index=False)['gtr'].transform(unwrap_50_mhz_clock) # This is a bit of a hack. It appears that many times the fit will # actually do much better by including a very low energy electron or # muon. I believe the reason for this is that of course my likelihood # function is not perfect (for example, I don't include the correct # angular distribution for Rayleigh scattered light), and so the fitter # often wants to add a very low energy electron or muon to fix things. # # Ideally I would fix the likelihood function, but for now we just # discard any fit results which have a very low energy electron or # muon. # # FIXME: Test this since query() is new to pandas fits = fits.query('not (n > 1 and ((id1 == 20 and energy1 < 20) or (id2 == 20 and energy2 < 20) or (id3 == 20 and energy3 < 20)))') fits = fits.query('not (n > 1 and ((id2 == 22 and energy1 < 200) or (id2 == 22 and energy2 < 200) or (id3 == 22 and energy3 < 200)))') # Calculate the approximate Ockham factor. # See Chapter 20 in "Probability Theory: The Logic of Science" by Jaynes # # Note: This is a really approximate form by assuming that the shape of # the likelihood space is equal to the average uncertainty in the # different parameters. fits['w'] = fits['n']*np.log(0.1*0.001) + np.log(fits['energy1']) + fits['n']*np.log(1e-4/(4*np.pi)) # Note: we index on the left hand site with loc to avoid a copy error # # See https://www.dataquest.io/blog/settingwithcopywarning/ fits.loc[fits['n'] > 1, 'w'] += np.log(fits[fits['n'] > 1]['energy2']) fits.loc[fits['n'] > 2, 'w'] += np.log(fits[fits['n'] > 2]['energy3']) fits['fmin'] = fits['fmin'] - fits['w'] fits['psi'] /= fits.merge(ev,on=['run','gtid'])['nhit'] fits['ke'] = fits['energy1'] fits['id'] = fits['id1'] fits.loc[fits['n'] == 2, 'id'] = fits['id1']*100 + fits['id2'] fits.loc[fits['n'] == 3, 'id'] = fits['id1']*10000 + fits['id2']*100 + fits['id3'] fits['theta'] = fits['theta1'] # Make sure events are in order. We use run number and GTID here which # should be in time order as well except for bit flips in the GTID # This is important for later when we look at time differences in the 50 # MHz clock. We should perhaps do a check here based on the 10 MHz clock to # make sure things are in order ev = ev.sort_values(by=['run','gtid']) print("number of events = %i" % len(ev)) # First, do basic data cleaning which is done for all events. ev = ev[ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ITC) == 0] print("number of events after data cleaning = %i" % len(ev)) # Now, we select events tagged by the muon tag which should tag only # external muons. We keep the sample of muons since it's needed later to # identify Michel electrons and to apply the muon follower cut muons = ev[(ev.dc & DC_MUON) != 0] print("number of muons = %i" % len(muons)) # Now, select prompt events. # # We define a prompt event here as any event with an NHIT > 100 and whose # previous > 100 nhit event was more than 250 ms ago prompt = ev[ev.nhit >= 100] prompt_mask = np.concatenate(([True],np.diff(prompt.gtr.values) > 250e6)) # Michel electrons and neutrons can be any event which is not a prompt # event follower = pd.concat([ev[ev.nhit < 100],prompt[~prompt_mask]]) # Apply the prompt mask prompt = prompt[prompt_mask] # Try to identify Michel electrons. Currenly, the event selection is based # on Richie's thesis. Here, we do the following: # # 1. Apply more data cleaning cuts to potential Michel electrons # 2. Nhit >= 100 # 3. It must be > 800 ns and less than 20 microseconds from a prompt event # or a muon prompt_plus_muons = pd.concat([prompt,muons]) print("number of events after prompt nhit cut = %i" % len(prompt)) if prompt_plus_muons.size and follower.size: # require Michel events to pass more of the SNO data cleaning cuts michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] michel = michel[michel.nhit >= 100] # Accept events which had a muon more than 800 nanoseconds but less # than 20 microseconds before them. The 800 nanoseconds cut comes from # Richie's thesis. He also mentions that the In Time Channel Spread Cut # is very effective at cutting electrons events caused by muons, so I # should implement that. # # Note: We currently don't look across run boundaries. This should be a # *very* small effect, and the logic to do so would be very complicated # since I would have to deal with 50 MHz clock rollovers, etc. # # Do a simple python for loop here over the runs since we create # temporary arrays with shape (michel.size,prompt_plus_muons.size) # which could be too big for the full dataset. # # There might be some clever way to do this without the loop in Pandas, # but I don't know how. michel_sum = None for run, michel_run in michel.groupby(['run']): prompt_plus_muons_run = prompt_plus_muons[prompt_plus_muons['run'] == run] michel_run = michel_run[np.any((michel_run.gtr.values > prompt_plus_muons_run.gtr.values[:,np.newaxis] + 800) & \ (michel_run.gtr.values < prompt_plus_muons_run.gtr.values[:,np.newaxis] + 20e3),axis=0)] if michel_sum is None: michel_sum = michel_run else: michel_sum = michel_sum.append(michel_run) if michel_sum is not None: michel = michel_sum else: michel = pd.DataFrame(columns=follower.columns) else: michel = pd.DataFrame(columns=follower.columns) if prompt.size and follower.size: # neutron followers have to obey stricter set of data cleaning cuts neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)] r = np.sqrt(neutron.ftp_x**2 + neutron.ftp_y**2 + neutron.ftp_z**2) neutron = neutron[r < AV_RADIUS] neutron = neutron[neutron.rsp_energy > 4.0] # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt) prompt_sum = None atm_sum = None for run, prompt_run in prompt.groupby(['run']): neutron_run = neutron[neutron['run'] == run] neutron_mask = np.any((neutron_run.gtr.values > prompt_run.gtr.values[:,np.newaxis] + 20e3) & \ (neutron_run.gtr.values < prompt_run.gtr.values[:,np.newaxis] + 250e6),axis=1) if prompt_sum is None: prompt_sum = prompt_run[~neutron_mask] else: prompt_sum = prompt_sum.append(prompt_run[~neutron_mask]) if atm_sum is None: atm_sum = prompt_run[neutron_mask] else: atm_sum = atm_sum.append(prompt_run[neutron_mask]) atm = atm_sum prompt = prompt_sum else: atm = pd.DataFrame(columns=prompt.columns) print("number of events after neutron follower cut = %i" % len(prompt)) # Get rid of muon events in our main event sample prompt = prompt[(prompt.dc & DC_MUON) == 0] atm = atm[(atm.dc & DC_MUON) == 0] print("number of events after muon cut = %i" % len(prompt)) if muons.size: # remove events 200 microseconds after a muon prompt = prompt[~np.any((prompt.run.values == muons.run.values[:,np.newaxis]) & \ (prompt.gtr.values > muons.gtr.values[:,np.newaxis]) & \ (prompt.gtr.values <= (muons.gtr.values[:,np.newaxis] + 200e3)),axis=0)] atm = atm[~np.any((atm.run.values == muons.run.values[:,np.newaxis]) & \ (atm.gtr.values > muons.gtr.values[:,np.newaxis]) & \ (atm.gtr.values <= (muons.gtr.values[:,np.newaxis] + 200e3)),axis=0)] # Check to see if there are any events with missing fit information atm_ra = atm[['run','gtid']].to_records(index=False) muons_ra = muons[['run','gtid']].to_records(index=False) prompt_ra = prompt[['run','gtid']].to_records(index=False) michel_ra = michel[['run','gtid']].to_records(index=False) fits_ra = fits[['run','gtid']].to_records(index=False) if len(atm_ra) and np.count_nonzero(~np.isin(atm_ra,fits_ra)): print_warning("skipping %i atmospheric events because they are missing fit information!" % np.count_nonzero(~np.isin(atm_ra,fits_ra))) if len(muons_ra) and np.count_nonzero(~np.isin(muons_ra,fits_ra)): print_warning("skipping %i muon events because they are missing fit information!" % np.count_nonzero(~np.isin(muons_ra,fits_ra))) if len(prompt_ra) and np.count_nonzero(~np.isin(prompt_ra,fits_ra)): print_warning("skipping %i signal events because they are missing fit information!" % np.count_nonzero(~np.isin(prompt_ra,fits_ra))) if len(michel_ra) and np.count_nonzero(~np.isin(michel_ra,fits_ra)): print_warning("skipping %i Michel events because they are missing fit information!" % np.count_nonzero(~np.isin(michel_ra,fits_ra))) # Now, we merge the event info with the fitter info. # # Note: This means that the dataframe now contains multiple rows for each # event, one for each fit hypothesis. atm = pd.merge(fits,atm,how='inner',on=['run','gtid']) muons = pd.merge(fits,muons,how='inner',on=['run','gtid']) michel = pd.merge(fits,michel,how='inner',on=['run','gtid']) prompt = pd.merge(fits,prompt,how='inner',on=['run','gtid']) # get rid of events which don't have a fit nan = np.isnan(prompt.fmin.values) if np.count_nonzero(nan): print_warning("skipping %i signal events because the negative log likelihood is nan!" % len(prompt[nan].groupby(['run','gtid']))) prompt = prompt[~nan] nan_atm = np.isnan(atm.fmin.values) if np.count_nonzero(nan_atm): print_warning("skipping %i atmospheric events because the negative log likelihood is nan!" % len(atm[nan_atm].groupby(['run','gtid']))) atm = atm[~nan_atm] nan_muon = np.isnan(muons.fmin.values) if np.count_nonzero(nan_muon): print_warning("skipping %i muons because the negative log likelihood is nan!" % len(muons[nan_muon].groupby(['run','gtid']))) muons = muons[~nan_muon] nan_michel = np.isnan(michel.fmin.values) if np.count_nonzero(nan_michel): print_warning("skipping %i michel electron events because the negative log likelihood is nan!" % len(michel[nan_michel].groupby(['run','gtid']))) michel = michel[~nan_michel] # get the best fit prompt = prompt.sort_values('fmin').groupby(['run','gtid']).nth(0) atm = atm.sort_values('fmin').groupby(['run','gtid']).nth(0) michel_best_fit = michel.sort_values('fmin').groupby(['run','gtid']).nth(0) muon_best_fit = muons.sort_values('fmin').groupby(['run','gtid']).nth(0) muons = muons[muons.id == 22] # require r < 6 meters prompt = prompt[np.sqrt(prompt.x.values**2 + prompt.y.values**2 + prompt.z.values**2) < AV_RADIUS] atm = atm[np.sqrt(atm.x.values**2 + atm.y.values**2 + atm.z.values**2) < AV_RADIUS] print("number of events after radius cut = %i" % len(prompt)) # Note: Need to design and apply a psi based cut here plt.figure() plot_hist(prompt,"Without Neutron Follower") plt.figure() plot_hist(atm,"With Neutron Follower") plt.figure() plot_hist(michel_best_fit,"Michel Electrons") plt.figure() plot_hist(muon_best_fit,"External Muons") # Plot the energy and angular distribution for external muons plt.figure() plt.subplot(2,1,1) plt.hist(muons.ke.values, bins=np.logspace(3,7,100), histtype='step') plt.xlabel("Energy (MeV)") plt.gca().set_xscale("log") plt.subplot(2,1,2) plt.hist(np.cos(muons.theta.values), bins=np.linspace(-1,1,100), histtype='step') plt.xlabel(r"$\cos(\theta)$") plt.suptitle("Muons") # For the Michel energy plot, we only look at the single particle electron # fit michel = michel[michel.id == 20] plt.figure() plt.hist(michel.ke.values, bins=np.linspace(20,100,100), histtype='step', label="My fitter") if michel.size: plt.hist(michel[~np.isnan(michel.rsp_energy.values)].rsp_energy.values, bins=np.linspace(20,100,100), histtype='step',label="RSP") plt.xlabel("Energy (MeV)") plt.title("Michel Electrons") plt.legend() plt.show()