add code to reweight the tau neutrino events

This commit updates the code to reweight the MC data from tau neutrinos since I stupidly simulated the muon neutrino flux instead of the tau neutrino flux.
author: tlatorre <tlatorre@uchicago.edu> 2020-12-15 14:09:26 -0600
committer: tlatorre <tlatorre@uchicago.edu> 2020-12-15 14:09:26 -0600
commit: be3b6e19510ddee930df1d9435368d66c18da551 (patch)
tree: 1248fb42e0f6ae40626b4f2dc228481a05a75f7b /utils
parent: 5e63a701a8b144093528c063856d513be92a37ed (diff)
download: sddm-be3b6e19510ddee930df1d9435368d66c18da551.tar.gz
sddm-be3b6e19510ddee930df1d9435368d66c18da551.tar.bz2
sddm-be3b6e19510ddee930df1d9435368d66c18da551.zip
6 files changed, 244 insertions, 32 deletions
diff --git a/utils/chi2 b/utils/chi2
index 117d0a0..e30a058 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -192,6 +192,9 @@ def get_mc_hists_fast(df_dict,x,bins,scale=1.0,reweight=False):
         else:
             cdf = fast_cdf(bins[:,np.newaxis],ke,resolution)
 
+        if 'flux_weight' in df.columns:
+            cdf *= df.flux_weight.values
+
         mc_hists[id] = np.sum(cdf[1:] - cdf[:-1],axis=-1)
         mc_hists[id] *= scale
     return mc_hists
@@ -474,6 +477,7 @@ if __name__ == '__main__':
     from sddm.plot import *
     from sddm import setup_matplotlib
     import nlopt
+    from sddm.renormalize import *
 
     parser = argparse.ArgumentParser("plot fit results")
     parser.add_argument("filenames", nargs='+', help="input files")
@@ -490,6 +494,7 @@ if __name__ == '__main__':
     parser.add_argument("--walkers", type=int, default=100, help="number of walkers")
     parser.add_argument("--thin", type=int, default=10, help="number of steps to thin")
     parser.add_argument("--run-list", default=None, help="run list")
+    parser.add_argument("--mcpl", nargs='+', required=True, help="GENIE MCPL files")
     args = parser.parse_args()
 
     setup_matplotlib(args.save)
@@ -520,6 +525,11 @@ if __name__ == '__main__':
     muon_mc = get_events(args.muon_mc, merge_fits=True, nhit_thresh=args.nhit_thresh, mc=True)
     weights = pd.concat([read_hdf(filename, "weights") for filename in args.weights],ignore_index=True)
 
+    # Add the "flux_weight" column to the ev_mc data since I stupidly simulated
+    # the muon neutrino flux for the tau neutrino flux in GENIE. Doh!
+    mcpl = load_mcpl_files(args.mcpl)
+    ev_mc = renormalize_data(ev_mc.reset_index(),mcpl)
+
     # There are a handful of weights which turn out to be slightly negative for
     # some reason. For example:
     #
diff --git a/utils/dm-search b/utils/dm-search
index 214ad59..262e664 100755
--- a/utils/dm-search
+++ b/utils/dm-search
@@ -209,6 +209,9 @@ def get_mc_hists_fast(df_dict,x,bins,scale=1.0,reweight=False):
         else:
             cdf = fast_cdf(bins[:,np.newaxis],ke,resolution)
 
+        if 'flux_weight' in df.columns:
+            cdf *= df.flux_weight.values
+
         mc_hists[id] = np.sum(cdf[1:] - cdf[:-1],axis=-1)
         mc_hists[id] *= scale
     return mc_hists
@@ -532,6 +535,7 @@ if __name__ == '__main__':
     from sddm.plot import *
     from sddm import setup_matplotlib
     import nlopt
+    from sddm.renormalize import *
 
     parser = argparse.ArgumentParser("plot fit results")
     parser.add_argument("filenames", nargs='+', help="input files")
@@ -547,6 +551,7 @@ if __name__ == '__main__':
     parser.add_argument("--thin", type=int, default=10, help="number of steps to thin")
     parser.add_argument("--test", type=int, default=0, help="run tests to check discovery threshold")
     parser.add_argument("--run-list", default=None, help="run list")
+    parser.add_argument("--mcpl", nargs='+', required=True, help="GENIE MCPL files")
     args = parser.parse_args()
 
     setup_matplotlib(args.save)
@@ -589,6 +594,11 @@ if __name__ == '__main__':
     muon_mc = get_events(args.muon_mc, merge_fits=True, nhit_thresh=args.nhit_thresh, mc=True)
     weights = pd.concat([read_hdf(filename, "weights") for filename in args.weights],ignore_index=True)
 
+    # Add the "flux_weight" column to the ev_mc data since I stupidly simulated
+    # the muon neutrino flux for the tau neutrino flux in GENIE. Doh!
+    mcpl = load_mcpl_files(args.mcpl)
+    ev_mc = renormalize_data(ev_mc.reset_index(),mcpl)
+
     # There are a handful of weights which turn out to be slightly negative for
     # some reason. For example:
     #
diff --git a/utils/print-event-rate b/utils/print-event-rate
index 691e806..6350ea8 100755
--- a/utils/print-event-rate
+++ b/utils/print-event-rate
@@ -28,27 +28,7 @@ import numpy as np
 import pandas as pd
 from sddm import read_hdf, AV_RADIUS, PSUP_RADIUS
 import gzip
-
-def read_mcpl(filename):
-    data = np.genfromtxt(filename,skip_header=1)
-    rv = {}
-    n = 1
-    i = 1
-    with gzip.open(filename,"r") as f:
-        lines = f.readlines()
-        nevents = int(lines[0].split()[0])
-        while True:
-            nparticles = int(data[i-1,0])
-            rv[n] = np.genfromtxt(lines[i:i+nparticles])
-            n += 1
-            i += nparticles
-
-            if i - 1 >= data.shape[0]:
-                break
-
-    assert nevents == len(rv)
-
-    return rv
+from sddm.renormalize import *
 
 def get_mcgn(filename):
     ev = read_hdf(filename, "ev")
@@ -76,28 +56,28 @@ if __name__ == '__main__':
     for filename in args.filenames:
         head, tail = split(filename)
         try:
-            mcpl = read_mcpl(filename)
+            mcpl = read_mcpl_df(filename)
         except Exception as e:
             print_warning("failed to read mcpl file '%s'" % filename)
+            continue
         run = int(tail.split('_')[3])
-        for ev in mcpl.values():
-            ev = np.atleast_2d(ev)
-            nu = ev[0,14]
-            r = np.linalg.norm(ev[0,16:19])
+        for _, row in mcpl.iterrows():
+            nu = row['nu_id']
+            r = row['nu_r']
             if r < AV_RADIUS:
-                if nu in ev[:,1]:
+                if not row['cc']:
                     # nc event
-                    nc_av[nu] += 1
+                    nc_av[nu] += row['flux_weight']
                 else:
                     # cc event
-                    cc_av[nu] += 1
+                    cc_av[nu] += row['flux_weight']
             if r < PSUP_RADIUS:
-                if nu in ev[:,1]:
+                if not row['cc']:
                     # nc event
-                    nc_psup[nu] += 1
+                    nc_psup[nu] += row['flux_weight']
                 else:
                     # cc event
-                    cc_psup[nu] += 1
+                    cc_psup[nu] += row['flux_weight']
         for i in range(run_info.shape[0]):
             if run_info[i][0] == run:
                 livetime += run_info[i][3]
diff --git a/utils/sddm/__init__.py b/utils/sddm/__init__.py
index 3b3ace7..c6f2ddc 100644
--- a/utils/sddm/__init__.py
+++ b/utils/sddm/__init__.py
@@ -12,6 +12,12 @@ import contextlib
 
 IDP_E_MINUS  =    20
 IDP_MU_MINUS =    22
+IDP_NU_E         = 30
+IDP_NU_E_BAR     = 31
+IDP_NU_MU        = 32
+IDP_NU_MU_BAR    = 33
+IDP_NU_TAU       = 34
+IDP_NU_TAU_BAR   = 35
 
 SNOMAN_MASS = {
     20: 0.511,
diff --git a/utils/sddm/flux_data.py b/utils/sddm/flux_data.py
new file mode 100644
index 0000000..47c40af
--- /dev/null
+++ b/utils/sddm/flux_data.py
@@ -0,0 +1,78 @@
+"""
+File which has the oscillated atmospheric neutrino fluxes. The columns are:
+
+    1. Energy (MeV)
+    2. Electron Neutrino Flux (1/m^2/s/MeV)
+    3. AntiElectron Neutrino Flux (1/m^2/s/MeV)
+    4. Muon Neutrino Flux (1/m^2/s/MeV)
+    5. AntiMuon Neutrino Flux (1/m^2/s/MeV)
+    6. Tau Neutrino Flux (1/m^2/s/MeV)
+    7. AntiTau Neutrino Flux (1/m^2/s/MeV)
+
+This is used to calculate a flux weight for the Monte Carlo data since I
+stupidly simulated the tau and antitau neutrino flux in GENIE with the muon and
+antimuon neutrino flux respectively.
+"""
+
+OSCILLATED_FLUX_DATA = u"""
+      10.6      98.04      88.35      95.36       85.9      91.51      82.26
+      11.9      107.4      105.9      104.8      103.5        100      98.32
+      13.3      120.1      109.9      117.8        108      111.8      101.9
+        15      129.5      118.9      126.8      115.8      120.9      111.2
+      16.8        138        133      134.8      130.4      128.8        124
+      18.8      152.7      141.1      150.7      139.3      141.2      130.7
+      21.1      160.3      151.4      157.8      149.5      149.5      141.2
+      23.7        173      161.2      169.6      157.8      163.2        152
+      26.6      179.1      166.4      174.4        162      171.1      158.3
+      29.9      187.1      173.6        182      167.9      179.4      166.6
+      33.5      185.5      173.3      178.8      166.5      180.7      168.1
+      37.6      182.3      173.3      173.1      163.5      181.1      171.2
+      42.2      172.4      162.2      160.9      150.7        175      162.5
+      47.3      151.1      141.1      140.2        129        156      143.7
+      53.1      118.1      110.7      108.4      99.93      123.7      113.7
+      59.6      107.7      100.3      98.65       89.7      113.3      103.8
+      66.8      99.08      93.02      90.92      83.31      104.6      95.96
+        75      90.26      84.61      83.84      76.23      94.94      86.65
+      84.1      82.02      75.87       76.4      69.57      85.84      76.14
+      94.4      74.56      69.04      71.66      63.99      76.83         69
+     105.9      69.56      63.93       68.6      62.21      70.59      64.47
+     118.9      68.59      64.46      70.19      67.94      69.28      67.52
+     133.4       57.7      53.02      59.52      57.73      55.82      54.65
+     149.6      48.24      45.48      50.51      49.34      45.31      44.34
+     167.9      40.15      37.25      42.83       41.6       36.8      35.89
+     188.4      33.09      30.76      36.69      35.72      29.34      28.63
+     211.3      27.31      25.25      30.27       29.4      24.13      23.46
+     237.1      22.03      20.48      25.88      25.14      18.73      18.18
+     266.1      18.07       16.9      21.44      20.76      14.86      14.29
+     298.5      14.41      13.39      17.33      16.93      11.55       11.2
+       335      11.73      10.69      14.17      13.66      9.194      8.731
+     375.8      9.328      8.446      11.19      10.93       7.02      6.797
+     421.7      7.366      6.659      8.932      8.667      5.495      5.199
+     473.2      5.699      5.128      7.091      6.884      4.188      4.006
+     530.9      4.562      4.017       5.55      5.414      3.178      3.046
+     595.7      3.477      3.063      4.383      4.254      2.406      2.282
+     668.3        2.7      2.363       3.42      3.312      1.805      1.703
+     749.9      2.088      1.792      2.662       2.58      1.334      1.274
+     841.4      1.603      1.348      2.069      1.998      1.006     0.9519
+     944.1      1.193      1.026      1.603      1.537     0.7457     0.7133
+      1059      0.907     0.7587      1.227      1.185     0.5513     0.5228
+      1188     0.6826     0.5751      0.933      0.903     0.4018     0.3812
+      1334     0.5139      0.422     0.7036     0.6828     0.2939     0.2828
+      1496     0.3848     0.3141     0.5309     0.5094     0.2174     0.2048
+      1679       0.28     0.2287     0.3991     0.3828     0.1581     0.1497
+      1884     0.2049     0.1645     0.2992     0.2852     0.1167     0.1127
+      2114     0.1507     0.1224     0.2215     0.2127    0.08437    0.08247
+      2371      0.109    0.08781     0.1642     0.1535    0.06156    0.05794
+      2661    0.08068    0.06351     0.1194      0.115    0.04385    0.04373
+      2985    0.05786    0.04496    0.08834    0.08457    0.03243    0.02999
+      3350    0.04134     0.0322    0.06537    0.05984    0.02374    0.02266
+      3758    0.02891    0.02288    0.04768    0.04467    0.01708    0.01604
+      4217    0.02085    0.01628    0.03491    0.03152    0.01284     0.0116
+      4732    0.01506    0.01164    0.02536    0.02314   0.008933   0.008413
+      5309      0.011   0.008069    0.01865    0.01661   0.005925   0.005687
+      5957   0.007738   0.005625    0.01289    0.01217    0.00413   0.004046
+      6683   0.005521   0.004016   0.009316   0.008403    0.00325   0.003145
+      7499   0.003893   0.002683   0.006806   0.006038   0.002367   0.002255
+      8414   0.002673   0.001833   0.005014     0.0043   0.001674   0.001483
+      9441   0.001766   0.001296   0.003724    0.00316    0.00112  0.0009962
+"""
diff --git a/utils/sddm/renormalize.py b/utils/sddm/renormalize.py
new file mode 100644
index 0000000..24e539f
--- /dev/null
+++ b/utils/sddm/renormalize.py
@@ -0,0 +1,128 @@
+from __future__ import print_function, division
+import numpy as np
+from os.path import split
+import gzip
+from . import *
+import pandas as pd
+from .flux_data import OSCILLATED_FLUX_DATA
+from io import StringIO
+
+def read_mcpl(filename):
+    """
+    Reads in an MCPL file and returns a dictionary whose keys are event numbers
+    and whose values are 2D arrays of the MCPL data.
+
+    The columns of the data array are:
+
+        1.  Number of particles in event
+        2.  SNOMAN particle ID
+        3.  X position (cm)
+        4.  Y position (cm)
+        5.  Z position (cm)
+        6.  Time
+        7.  Energy (MeV)
+        8.  X direction
+        9.  Y direction
+        10. Z direction
+        11. Neutrino Energy (MeV)
+        12. X Neutrino direction (cm)
+        13. Y Neutrino direction (cm)
+        14. Z Neutrino direction (cm)
+        15. Neutrino flavor
+        16. Interaction Code
+        17. X Neutrino position (cm)
+        18. Y Neutrino position (cm)
+        19. Z Neutrino position (cm)
+
+    Note: You need to subtract 1 from the numbers above to get the index.
+    """
+    data = np.genfromtxt(filename,skip_header=1)
+    rv = {}
+    n = 1
+    i = 1
+    with gzip.open(filename,"r") as f:
+        lines = f.readlines()
+        nevents = int(lines[0].split()[0])
+        while True:
+            nparticles = int(data[i-1,0])
+            rv[n] = np.genfromtxt(lines[i:i+nparticles])
+            n += 1
+            i += nparticles
+
+            if i - 1 >= data.shape[0]:
+                break
+
+    assert nevents == len(rv)
+
+    return rv
+
+def add_flux_weight(mcpl):
+    x, nue, nbe, num, nbm, nut, nbt = np.loadtxt(StringIO(OSCILLATED_FLUX_DATA)).T
+
+    nut_num = nut/num
+    nbt_nbm = nbt/nbm
+
+    mcpl['flux_weight'] = 1.0
+    mcpl.loc[mcpl.nu_id == IDP_NU_TAU,'flux_weight'] = np.interp(mcpl.loc[mcpl.nu_id == IDP_NU_TAU,'nu_energy'],x,nut_num)
+    mcpl.loc[mcpl.nu_id == IDP_NU_TAU_BAR,'flux_weight'] = np.interp(mcpl.loc[mcpl.nu_id == IDP_NU_TAU_BAR,'nu_energy'],x,nbt_nbm)
+
+    return mcpl
+
+def read_mcpl_df(filename):
+    """
+    Similar to read_mcpl() but returns a dataframe instead with only one row
+    per event. The dataframe has the following columns:
+
+        1. run
+        3. evn: the event number
+        3. nu_id: the SNOMAN particle ID for the neutrino
+        4. nu_energy: the energy of the neutrino in MeV
+        5. nu_r: the radius of the neutrino interaction point
+        6. cc: is the event a CC event?
+        7. flux_weight: a weight for the event to correct for the fact that I
+           accidentally simulated the muon neutrino flux for the tau neutrino
+           flux
+    """
+    head, tail = split(filename)
+    run = int(tail.split('_')[3])
+
+    data = []
+    for evn, ev in read_mcpl(filename).iteritems():
+        ev = np.atleast_2d(ev)
+        nu = ev[0,14]
+        energy = ev[0,10]
+        r = np.linalg.norm(ev[0,16:19])
+        # Is the event a CC event?
+        cc = nu not in ev[:,1]
+        data.append((run,evn,nu,energy,r,cc))
+
+    run, evn, nu, energy, r, cc = zip(*data)
+    return add_flux_weight(pd.DataFrame({'run':run,'evn':evn,'nu_id':nu,'nu_energy':energy,'nu_r':r,'cc':cc}))
+
+
+def load_mcpl_files(filenames):
+    """
+    Load a bunch of MCPL files and return a dataframe. Similar to
+    read_mcpl_df() but for a list of filenames.
+    """
+    mcpls = []
+    print("loading mcpl files")
+    for filename in filenames:
+        try:
+            mcpl = read_mcpl_df(filename)
+        except Exception as e:
+            print_warning("failed to read mcpl file '%s'" % filename)
+        mcpls.append(mcpl)
+
+    print("done")
+    return pd.concat(mcpls)
+
+def renormalize_data(data, mcpl):
+    """
+    Basically just merges the dataframe returned by load_mcpl_files() with the
+    MC dataframe so that the "flux_weight" column is available to renormalize
+    the tau neutrino data.
+    """
+    data = pd.merge(data,mcpl,on=['run','evn'])
+    data['flux_weight'] = data['flux_weight'].fillna(1.0)
+    return data
author	tlatorre <tlatorre@uchicago.edu>	2020-12-15 14:09:26 -0600
committer	tlatorre <tlatorre@uchicago.edu>	2020-12-15 14:09:26 -0600
commit	be3b6e19510ddee930df1d9435368d66c18da551 (patch)
tree	1248fb42e0f6ae40626b4f2dc228481a05a75f7b /utils
parent	5e63a701a8b144093528c063856d513be92a37ed (diff)
download	sddm-be3b6e19510ddee930df1d9435368d66c18da551.tar.gz sddm-be3b6e19510ddee930df1d9435368d66c18da551.tar.bz2 sddm-be3b6e19510ddee930df1d9435368d66c18da551.zip