#!/usr/bin/env python
# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <https://www.gnu.org/licenses/>.

from __future__ import print_function, division
import yaml
try:
    from yaml import CLoader as Loader
except ImportError:
    from yaml.loader import SafeLoader as Loader
import numpy as np
from scipy.stats import iqr
from matplotlib.lines import Line2D

# on retina screens, the default plots are way too small
# by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1
# Qt5 will scale everything using the dpi in ~/.Xresources
import matplotlib
matplotlib.use("Qt5Agg")

SNOMAN_MASS = {
    20: 0.511,
    21: 0.511,
    22: 105.658,
    23: 105.658
}

def plot_hist(x, label=None):
    # determine the bin width using the Freedman Diaconis rule
    # see https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule
    h = 2*iqr(x)/len(x)**(1/3)
    n = max(int((np.max(x)-np.min(x))/h),10)
    bins = np.linspace(np.min(x),np.max(x),n)
    plt.hist(x, bins=bins, histtype='step', label=label)

def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

if __name__ == '__main__':
    import argparse
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd

    parser = argparse.ArgumentParser("plot fit results")
    parser.add_argument("filenames", nargs='+', help="input files")
    args = parser.parse_args()

    fit_results = []

    for filename in args.filenames:
        print(filename)
        with open(filename) as f:
            data = yaml.load(f.read(),Loader=Loader)

        for i, event in enumerate(data['data']):
            for ev in event['ev']:
                if 'fit' not in ev:
                    continue
                for id, fit_result in ev['fit'].iteritems():
                    # FIXME: Should I just store the particle ids in the YAML
                    # output as a list of particle ids instead of a single
                    # integer?
                    ids = map(int,chunks(str(id),2))
                    energy = 0.0
                    for i, ke in zip(ids,np.atleast_1d(fit_result['energy'])):
                        energy += ke + SNOMAN_MASS[i]
                    fit_results.append((
                        ev['run'],
                        ev['gtid'],
                        id,
                        fit_result['posx'],
                        fit_result['posy'],
                        fit_result['posz'],
                        fit_result['t0'],
                        energy,
                        fit_result['fmin']))

    # create a dataframe
    # note: we have to first create a numpy structured array since there is no
    # way to pass a list of data types to the DataFrame constructor. See
    # https://github.com/pandas-dev/pandas/issues/4464
    array = np.array(fit_results,
                     dtype=[('run',np.int),      # run number
                            ('gtid',np.int),     # gtid
                            ('id',np.int),       # particle id
                            ('x', np.double),    # x
                            ('y',np.double),     # y
                            ('z',np.double),     # z
                            ('t0',np.double),    # t0
                            ('ke',np.double),    # kinetic energy
                            ('fmin',np.double)]  # negative log likelihood
                   )
    df = pd.DataFrame.from_records(array)

    # get the best fit
    df = df.sort_values('fmin').groupby(['run','gtid']).first()

    for id, df_id in df.groupby('id'):
        plt.figure()
        plot_hist(df_id.ke.values)
        plt.xlabel("Energy (MeV)")
        plt.title(str(id))
    plt.show()