#!/usr/bin/env python # Copyright (c) 2019, Anthony Latorre # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see . from __future__ import print_function, division import yaml try: from yaml import CLoader as Loader except ImportError: from yaml.loader import SafeLoader as Loader import numpy as np from scipy.stats import iqr from matplotlib.lines import Line2D # from https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python class bcolors: HEADER = '\033[95m' OKBLUE = '\033[94m' OKGREEN = '\033[92m' WARNING = '\033[93m' FAIL = '\033[91m' ENDC = '\033[0m' BOLD = '\033[1m' UNDERLINE = '\033[4m' # on retina screens, the default plots are way too small # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 # Qt5 will scale everything using the dpi in ~/.Xresources import matplotlib matplotlib.use("Qt5Agg") SNOMAN_MASS = { 20: 0.511, 21: 0.511, 22: 105.658, 23: 105.658 } AV_RADIUS = 600.0 # Data cleaning bitmasks. DC_MUON = 0x1 DC_JUNK = 0x2 DC_CRATE_ISOTROPY = 0x4 DC_QVNHIT = 0x8 DC_NECK = 0x10 DC_FLASHER = 0x20 DC_ESUM = 0x40 DC_OWL = 0x80 DC_OWL_TRIGGER = 0x100 DC_FTS = 0x200 def plot_hist(df, title=None): for id, df_id in sorted(df.groupby('id')): if id == 20: plt.subplot(3,4,1) elif id == 22: plt.subplot(3,4,2) elif id == 2020: plt.subplot(3,4,5) elif id == 2022: plt.subplot(3,4,6) elif id == 2222: plt.subplot(3,4,7) elif id == 202020: plt.subplot(3,4,9) elif id == 202022: plt.subplot(3,4,10) elif id == 202222: plt.subplot(3,4,11) elif id == 222222: plt.subplot(3,4,12) plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step') plt.xlabel("Energy (MeV)") plt.title(str(id)) if title: plt.suptitle(title) if len(df): plt.tight_layout() def chunks(l, n): """Yield successive n-sized chunks from l.""" for i in range(0, len(l), n): yield l[i:i + n] def print_warning(msg): print(bcolors.FAIL + msg + bcolors.ENDC,file=sys.stderr) if __name__ == '__main__': import argparse import matplotlib.pyplot as plt import numpy as np import pandas as pd import sys parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") args = parser.parse_args() events = [] fit_results = [] for filename in args.filenames: print(filename) with open(filename) as f: data = yaml.load(f,Loader=Loader) for i, event in enumerate(data['data']): for ev in event['ev']: events.append(( ev['run'], ev['gtr'], ev['nhit'], ev['gtid'], ev['dc'], ev['ftp']['x'] if 'ftp' in ev else np.nan, ev['ftp']['y'] if 'ftp' in ev else np.nan, ev['ftp']['z'] if 'ftp' in ev else np.nan, ev['rsp']['energy'] if 'rsp' in ev and ev['rsp']['energy'] > 0 else np.nan, )) if 'fit' not in ev: continue for id, fit_result in [x for x in ev['fit'].iteritems() if isinstance(x[0],int)]: # FIXME: Should I just store the particle ids in the YAML # output as a list of particle ids instead of a single # integer? ids = map(int,chunks(str(id),2)) energy = 0.0 skip = False for i, ke in zip(ids,np.atleast_1d(fit_result['energy'])): energy += ke + SNOMAN_MASS[i] # This is a bit of a hack. It appears that many times # the fit will actually do much better by including a # very low energy electron or muon. I believe the # reason for this is that of course my likelihood # function is not perfect (for example, I don't include # the correct angular distribution for Rayleigh # scattered light), and so the fitter often wants to # add a very low energy electron or muon to fix things. # # Ideally I would fix the likelihood function, but for # now we just discard any fit results which have a very # low energy electron or muon. if len(ids) > 1 and i == 20 and ke < 20.0: skip = True if len(ids) > 1 and i == 22 and ke < 200.0: skip = True if skip: continue # Calculate the approximate Ockham factor. # See Chapter 20 in "Probability Theory: The Logic of Science" by Jaynes # # Note: This is a really approximate form by assuming that # the shape of the likelihood space is equal to the average # uncertainty in the different parameters. w = len(ids)*np.log(0.1*0.001) + np.sum(np.log(fit_result['energy'])) + len(ids)*np.log(1e-4/(4*np.pi)) fit_results.append(( ev['run'], ev['gtid'], id, fit_result['posx'], fit_result['posy'], fit_result['posz'], fit_result['t0'], energy, np.atleast_1d(fit_result['theta'])[0], np.atleast_1d(fit_result['phi'])[0], fit_result['fmin'] - w, fit_result['psi']/ev['nhit'])) # create a dataframe # note: we have to first create a numpy structured array since there is no # way to pass a list of data types to the DataFrame constructor. See # https://github.com/pandas-dev/pandas/issues/4464 array = np.array(fit_results, dtype=[('run',np.int), # run number ('gtid',np.int), # gtid ('id',np.int), # particle id ('x', np.double), # x ('y',np.double), # y ('z',np.double), # z ('t0',np.double), # t0 ('ke',np.double), # kinetic energy ('theta',np.double), # direction of 1st particle ('phi',np.double), # direction of 2nd particle ('fmin',np.double), # negative log likelihood ('psi',np.double)] # goodness of fit parameter ) df = pd.DataFrame.from_records(array) array = np.array(events, dtype=[('run',np.int), # run number ('gtr',np.double), # 50 MHz clock in ns ('nhit',np.int), # number of PMTs hit ('gtid',np.int), # gtid ('dc',np.int), # data cleaning word ('ftpx',np.double), # FTP fitter X position ('ftpy',np.double), # FTP fitter Y position ('ftpz',np.double), # FTP fitter Z position ('rsp_energy',np.double)] # RSP energy ) df_ev = pd.DataFrame.from_records(array) # Make sure events are in order. We use run number and GTID here which # should be in time order as well except for bit flips in the GTID # This is important for later when we look at time differences in the 50 # MHz clock. We should perhaps do a check here based on the 10 MHz clock to # make sure things are in order df_ev = df_ev.sort_values(by=['run','gtid']) print("number of events = %i" % len(df_ev)) # First, do basic data cleaning which is done for all events. df_ev = df_ev[df_ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK) == 0] print("number of events after data cleaning = %i" % len(df_ev)) # Now, we select events tagged by the muon tag which should tag only # external muons. We keep the sample of muons since it's needed later to # identify Michel electrons and to apply the muon follower cut muons = df_ev[(df_ev.dc & DC_MUON) != 0] print("number of muons = %i" % len(muons)) # Now, select prompt events. # # We define a prompt event here as any event with an NHIT > 100 and whose # previous > 100 nhit event was more than 250 ms ago # # Should I use 10 MHz clock here since the time isn't critical and then I # don't have to worry about 50 MHz clock rollover? prompt = df_ev[df_ev.nhit >= 100] prompt_mask = np.concatenate(([True],np.diff(prompt.gtr.values) > 250e6)) # Michel electrons and neutrons can be any event which is not a prompt # event follower = pd.concat([df_ev[df_ev.nhit < 100],prompt[~prompt_mask]]) # Apply the prompt mask prompt = prompt[prompt_mask] # Try to identify Michel electrons. Currenly, the event selection is based # on Richie's thesis. Here, we do the following: # # 1. Apply more data cleaning cuts to potential Michel electrons # 2. Nhit >= 100 # 3. It must be > 800 ns and less than 20 microseconds from a prompt event # or a muon prompt_plus_muons = pd.concat([prompt,muons]) print("number of events after prompt nhit cut = %i" % len(prompt)) if prompt_plus_muons.size and follower.size: # require Michel events to pass more of the SNO data cleaning cuts michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == DC_FTS] michel = michel[michel.nhit >= 100] # accept events which had a muon more than 800 nanoseconds but less than 20 microseconds before them # The 800 nanoseconds cut comes from Richie's thesis. He also mentions # that the In Time Channel Spread Cut is very effective at cutting # electrons events caused by muons, so I should implement that michel = michel[np.any((michel.run.values == prompt_plus_muons.run.values[:,np.newaxis]) & \ (michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \ (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3),axis=0)] else: michel = pd.DataFrame(columns=follower.columns) if prompt.size and follower.size: # neutron followers have to obey stricter set of data cleaning cuts neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == DC_FTS] neutron = neutron[~np.isnan(neutron.ftpx) & ~np.isnan(neutron.rsp_energy)] r = np.sqrt(neutron.ftpx**2 + neutron.ftpy**2 + neutron.ftpz**2) neutron = neutron[r < AV_RADIUS] neutron = neutron[neutron.rsp_energy > 4.0] # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt) # FIXME: need to deal with 50 MHz clock rollover neutron_mask = np.any((neutron.run.values == prompt.run.values[:,np.newaxis]) & \ (neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \ (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1) df_atm = prompt[neutron_mask] prompt = prompt[~neutron_mask] else: df_atm = pd.DataFrame(columns=prompt.columns) print("number of events after neutron follower cut = %i" % len(prompt)) # Get rid of muon events in our main event sample prompt = prompt[(prompt.dc & DC_MUON) == 0] df_atm = df_atm[(df_atm.dc & DC_MUON) == 0] print("number of events after muon cut = %i" % len(prompt)) if muons.size: # FIXME: need to deal with 50 MHz clock rollover # remove events 200 microseconds after a muon prompt = prompt[~np.any((prompt.run.values == muons.run.values[:,np.newaxis]) & \ (prompt.gtr.values > muons.gtr.values[:,np.newaxis]) & \ (prompt.gtr.values <= (muons.gtr.values[:,np.newaxis] + 200e3)),axis=0)] df_atm = df_atm[~np.any((df_atm.run.values == muons.run.values[:,np.newaxis]) & \ (df_atm.gtr.values > muons.gtr.values[:,np.newaxis]) & \ (df_atm.gtr.values <= (muons.gtr.values[:,np.newaxis] + 200e3)),axis=0)] # Check to see if there are any events with missing fit information df_atm_ra = df_atm[['run','gtid']].to_records(index=False) muons_ra = muons[['run','gtid']].to_records(index=False) prompt_ra = prompt[['run','gtid']].to_records(index=False) michel_ra = michel[['run','gtid']].to_records(index=False) df_ra = df[['run','gtid']].to_records(index=False) if np.count_nonzero(~np.isin(df_atm_ra,df_ra)): print_warning("skipping %i atmospheric events because they are missing fit information!" % np.count_nonzero(~np.isin(df_atm_ra,df_ra))) if np.count_nonzero(~np.isin(muons_ra,df_ra)): print_warning("skipping %i muon events because they are missing fit information!" % np.count_nonzero(~np.isin(muons_ra,df_ra))) if np.count_nonzero(~np.isin(prompt_ra,df_ra)): print_warning("skipping %i signal events because they are missing fit information!" % np.count_nonzero(~np.isin(prompt_ra,df_ra))) if np.count_nonzero(~np.isin(michel_ra,df_ra)): print_warning("skipping %i Michel events because they are missing fit information!" % np.count_nonzero(~np.isin(michel_ra,df_ra))) # Now, we merge the event info with the fitter info. # # Note: This means that the dataframe now contains multiple rows for each # event, one for each fit hypothesis. df_atm = pd.merge(df,df_atm,how='inner',on=['run','gtid']) muons = pd.merge(df,muons,how='inner',on=['run','gtid']) michel = pd.merge(df,michel,how='inner',on=['run','gtid']) df = pd.merge(df,prompt,how='inner',on=['run','gtid']) # get rid of events which don't have a fit nan = np.isnan(df.fmin.values) if np.count_nonzero(nan): print_warning("skipping %i signal events because the negative log likelihood is nan!" % len(df[nan].groupby(['run','gtid']))) df = df[~nan] nan_atm = np.isnan(df_atm.fmin.values) if np.count_nonzero(nan_atm): print_warning("skipping %i atmospheric events because the negative log likelihood is nan!" % len(df_atm[nan_atm].groupby(['run','gtid']))) df_atm = df_atm[~nan_atm] nan_muon = np.isnan(muons.fmin.values) if np.count_nonzero(nan_muon): print_warning("skipping %i muons because the negative log likelihood is nan!" % len(muons[nan_muon].groupby(['run','gtid']))) muons = muons[~nan_muon] nan_michel = np.isnan(michel.fmin.values) if np.count_nonzero(nan_michel): print_warning("skipping %i michel electron events because the negative log likelihood is nan!" % len(michel[nan_michel].groupby(['run','gtid']))) michel = michel[~nan_michel] # get the best fit df = df.sort_values('fmin').groupby(['run','gtid']).first() df_atm = df_atm.sort_values('fmin').groupby(['run','gtid']).first() michel_best_fit = michel.sort_values('fmin').groupby(['run','gtid']).first() muon_best_fit = muons.sort_values('fmin').groupby(['run','gtid']).first() muons = muons[muons.id == 22] # require r < 6 meters df = df[np.sqrt(df.x.values**2 + df.y.values**2 + df.z.values**2) < AV_RADIUS] df_atm = df_atm[np.sqrt(df_atm.x.values**2 + df_atm.y.values**2 + df_atm.z.values**2) < AV_RADIUS] print("number of events after radius cut = %i" % len(df)) # Note: Need to design and apply a psi based cut here plt.figure() plot_hist(df,"Without Neutron Follower") plt.figure() plot_hist(df_atm,"With Neutron Follower") plt.figure() plot_hist(michel_best_fit,"Michel Electrons") plt.figure() plot_hist(muon_best_fit,"External Muons") # Plot the energy and angular distribution for external muons plt.figure() plt.subplot(2,1,1) plt.hist(muons.ke.values, bins=np.logspace(3,7,100), histtype='step') plt.xlabel("Energy (MeV)") plt.gca().set_xscale("log") plt.subplot(2,1,2) plt.hist(np.cos(muons.theta.values), bins=np.linspace(-1,1,100), histtype='step') plt.xlabel(r"$\cos(\theta)$") plt.suptitle("Muons") # For the Michel energy plot, we only look at the single particle electron # fit michel = michel[michel.id == 20] plt.figure() plt.hist(michel.ke.values, bins=np.linspace(20,100,100), histtype='step', label="My fitter") if michel.size: plt.hist(michel[~np.isnan(michel.rsp_energy.values)].rsp_energy.values, bins=np.linspace(20,100,100), histtype='step',label="RSP") plt.xlabel("Energy (MeV)") plt.title("Michel Electrons") plt.legend() plt.show()