aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortlatorre <tlatorre@uchicago.edu>2020-06-16 01:56:22 -0500
committertlatorre <tlatorre@uchicago.edu>2020-06-16 01:56:22 -0500
commitc713bca9ea51cec82471bb04cf0eec87f9e36912 (patch)
treead1338eccb7a1273ba25a0e030f647662d991254
parent0b70890d0462b56d54f6ba76d6df142f5f992ebe (diff)
downloadsddm-c713bca9ea51cec82471bb04cf0eec87f9e36912.tar.gz
sddm-c713bca9ea51cec82471bb04cf0eec87f9e36912.tar.bz2
sddm-c713bca9ea51cec82471bb04cf0eec87f9e36912.zip
update follower cuts to be more memory efficient by using ffill()
-rwxr-xr-xutils/plot-energy8
-rw-r--r--utils/sddm/__init__.py2
-rwxr-xr-xutils/sddm/plot_energy.py109
3 files changed, 37 insertions, 82 deletions
diff --git a/utils/plot-energy b/utils/plot-energy
index ede0613..4568bcc 100755
--- a/utils/plot-energy
+++ b/utils/plot-energy
@@ -136,16 +136,10 @@ if __name__ == '__main__':
# 2. Nhit >= 100
# 3. It must be > 800 ns and less than 20 microseconds from a prompt event
# or a muon
- michel = ev.groupby('run',group_keys=False).apply(michel_cut)
+ michel = ev[ev.michel]
print("number of michel events = %i" % len(michel))
- # Tag atmospheric events.
- #
- # Note: We don't cut atmospheric events or muons yet because we still need
- # all the events in order to apply the muon follower cut.
- ev = ev.groupby('run',group_keys=False).apply(atmospheric_events)
-
print("number of events after neutron follower cut = %i" % np.count_nonzero(ev.prompt & (~ev.atm)))
# remove events 200 microseconds after a muon
diff --git a/utils/sddm/__init__.py b/utils/sddm/__init__.py
index d3a1be4..4df4da8 100644
--- a/utils/sddm/__init__.py
+++ b/utils/sddm/__init__.py
@@ -182,4 +182,4 @@ def read_hdf(filename, df):
"""
with h5py.File(filename) as f:
data = f[df][:]
- return pd.DataFrame.from_records(data)
+ return pd.DataFrame(data)
diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py
index 8027193..1971c8d 100755
--- a/utils/sddm/plot_energy.py
+++ b/utils/sddm/plot_energy.py
@@ -82,75 +82,34 @@ def breakdown_follower_cut(ev):
"""
Cuts all events within 1 second of breakdown events.
"""
- breakdowns = ev[ev.dc & DC_BREAKDOWN != 0]
- return ev[~np.any((ev.gtr.values > breakdowns.gtr.values[:,np.newaxis]) & \
- (ev.gtr.values < breakdowns.gtr.values[:,np.newaxis] + 1e9),axis=0)]
+ time_since_last_breakdown = ev.gtr - ev.gtr.where(ev.dc & DC_BREAKDOWN != 0).ffill()
+ return ev[~((time_since_last_breakdown > 0) & (time_since_last_breakdown < 1e9))]
def flasher_follower_cut(ev):
"""
Cuts all events within 200 microseconds of flasher events.
"""
- flashers = ev[ev.dc & DC_FLASHER != 0]
- return ev[~np.any((ev.gtr.values > flashers.gtr.values[:,np.newaxis]) & \
- (ev.gtr.values < flashers.gtr.values[:,np.newaxis] + 200e3),axis=0)]
+ time_since_last_flasher = ev.gtr - ev.gtr.where(ev.dc & DC_FLASHER != 0).ffill()
+ return ev[~((time_since_last_flasher > 0) & (time_since_last_flasher < 200e3))]
def muon_follower_cut(ev):
"""
Cuts all events 200 microseconds after a muon.
"""
- muons = ev[ev.dc & DC_MUON != 0]
- return ev[~np.any((ev.gtr.values > muons.gtr.values[:,np.newaxis]) & \
- (ev.gtr.values < muons.gtr.values[:,np.newaxis] + 200e3),axis=0)]
-
-def michel_cut(ev):
- """
- Looks for Michel electrons after muons.
- """
- prompt_plus_muons = ev[ev.prompt | ((ev.dc & DC_MUON) != 0)]
-
- # Michel electrons and neutrons can be any event which is not a prompt
- # event
- follower = ev[~ev.prompt]
-
- # require Michel events to pass more of the SNO data cleaning cuts
- michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
-
- michel = michel[michel.nhit >= 100]
-
- # Accept events which had a muon more than 800 nanoseconds but less than 20
- # microseconds before them. The 800 nanoseconds cut comes from Richie's
- # thesis. He also mentions that the In Time Channel Spread Cut is very
- # effective at cutting electron events caused by muons, so I should
- # implement that.
- #
- # Note: We currently don't look across run boundaries. This should be a
- # *very* small effect, and the logic to do so would be very complicated
- # since I would have to deal with 50 MHz clock rollovers, etc.
- if prompt_plus_muons.size and michel.size:
- mask = (michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \
- (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3)
- michel = michel.iloc[np.any(mask,axis=0)]
- michel['muon_gtid'] = pd.Series(prompt_plus_muons['gtid'].iloc[np.argmax(mask[:,np.any(mask,axis=0)],axis=0)].values,
- index=michel.index.values,
- dtype=np.int32)
- return michel
- else:
- # Return an empty slice since we need it to have the same datatype as
- # the other dataframes
- michel = ev[:0]
- michel['muon_gtid'] = -1
- return michel
+ time_since_last_muon = ev.gtr - ev.gtr.where(ev.dc & DC_MUON != 0).ffill()
+ return ev[~((time_since_last_muon > 0) & (time_since_last_muon < 200e3))]
def tag_michels(ev):
"""
- Tags michel events with the michel column and also adds a column linking to
- the muon GTID. The muon also gets labelled with the stopping_muon column.
+ Looks for Michel electrons after muons.
"""
- michels = michel_cut(ev)
- ev['michel'] = np.zeros(len(ev),dtype=np.bool)
- ev.loc[michels.index,'michel'] = True
- ev['muon_gtid'] = -np.ones(len(ev),dtype=np.int)
- ev.loc[michels.index,'muon_gtid'] = michels['muon_gtid']
+ time_since_last_prompt_plus_muon = ev.gtr - ev.gtr.where(ev.prompt | ((ev.dc & DC_MUON) != 0)).ffill()
+ ev['muon_gtid'] = ev.gtid.where(ev.prompt | ((ev.dc & DC_MUON) != 0)).ffill()
+ ev['michel'] = ~ev.prompt
+ ev['michel'] &= ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0
+ ev['michel'] &= ev.nhit >= 100
+ ev['michel'] &= (time_since_last_prompt_plus_muon > 800) & (time_since_last_prompt_plus_muon < 200e3)
+ ev.loc[~ev.michel,'muon_gtid'] = -1
ev['stopping_muon'] = np.zeros(len(ev),dtype=np.bool)
ev.loc[ev.gtid.isin(ev.muon_gtid[ev.muon_gtid > 0].values),'stopping_muon'] = 1
return ev
@@ -159,25 +118,13 @@ def atmospheric_events(ev):
"""
Tags atmospheric events which have a neutron follower.
"""
- prompt = ev[ev.prompt]
-
- # Michel electrons and neutrons can be any event which is not a prompt
- # event
- follower = ev[~ev.prompt]
-
- ev['atm'] = np.zeros(len(ev),dtype=np.bool)
-
- if prompt.size and follower.size:
- # neutron followers have to obey stricter set of data cleaning cuts
- neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
- neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)]
- # FIXME: What should the radius cut be here? AV? (r/r_psup)^3 < 0.9?
- neutron = neutron[neutron.ftp_r < AV_RADIUS]
- neutron = neutron[neutron.rsp_energy > 4.0]
-
- # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt)
- ev.loc[ev.prompt,'atm'] = np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \
- (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1)
+ ev['neutron'] = ~ev.prompt
+ ev['neutron'] &= ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0
+ ev['neutron'] &= ~np.isnan(ev.ftp_x) & ~np.isnan(ev.rsp_energy)
+ ev['neutron'] &= ev.ftp_r < AV_RADIUS
+ ev['neutron'] &= ev.rsp_energy > 4.0
+ time_till_next_neutron = ev.gtr.where(ev.neutron).bfill() - ev.gtr
+ ev['atm'] = ev.prompt & (time_till_next_neutron > 20e3) & (time_till_next_neutron < 250e6)
return ev
@@ -595,8 +542,22 @@ def get_events(filenames, merge_fits=False):
ev['signal'] = ~(ev.noise | ev.neck | ev.flasher | ev.breakdown | ev.muon)
ev['instrumental'] = ~ev.signal
+ # Try to identify Michel electrons. Currently, the event selection is based
+ # on Richie's thesis. Here, we do the following:
+ #
+ # 1. Apply more data cleaning cuts to potential Michel electrons
+ # 2. Nhit >= 100
+ # 3. It must be > 800 ns and less than 20 microseconds from a prompt event
+ # or a muon
ev = ev.groupby('run',group_keys=False).apply(tag_michels)
+ # Tag atmospheric events.
+ ev = ev.groupby('run',group_keys=False).apply(atmospheric_events)
+
+ # Cut on nhit_cal >= 100 here to reduce memory usage when merging fits. We
+ # only need the lower nhit events for neutrons.
+ ev = ev[ev.nhit_cal >= 100]
+
if merge_fits:
ev = pd.merge(ev,fits,how='left',on=['run','gtid'])
# Reset n, id1, id2, and id3 columns to integers