import matplotlib
matplotlib.use('Agg') 
import matplotlib.pyplot as plt

from os import makedirs
from os.path import join, realpath, isfile, dirname
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime

import numpy as np
import pickle
from glob import glob
from natsort import natsorted
from joblib import Parallel, delayed

from main_random_walk import MainRW
from rw_conventional import RWConventional
from calc_rates import CalcRates
from plot_rw_results import PlotRW

EXP_FLUX = np.loadtxt('/dickson/s2/jim/repos/wepy_async/expected_flux_peaked_prob_40_states.txt')

def get_n_cycles(frames, total_cycles):
    """Calculate the minimum number of cycles to accommodate each trajectory. 

                  Cycles ->
    Total cycles: |--------------->|
    Trajectory 1: |--->|
    Trajectory 2: |-------->|
    Trajectory 3: |----->|

    Min. cycles:  |<------->|


    Args:
        frames (list-like object with shape (N, 3)): Frame indices as (run, walker, cycle)
        total_cycles (unsigned int): The maximum number of async. cycles

    Returns:
        unsigned int: The minimum number of cycles
    """
    min_starting_cycle = frames[0][0][2]

    for i in range(1, len(frames)):
        if min_starting_cycle > frames[i][0][2]:
            min_starting_cycle = frames[i][0][2]

    n_cycles = total_cycles - min_starting_cycle

    return n_cycles


def get_init_pos_and_frame_mapping(n_reps, frames):
    """Convert 

    Args:
        n_reps (unsigned int): Number of replicates for each frame
        frames (list-like object of shape (N, 2, (1, 3))): List-like object with each entry being (initial position, frame index)

    Returns:
        2-tuple with shape (1, 3): Initial positions and frame index for each replicate
    """
    frame_mapping = []
    init_positions = np.empty((len(frames)*n_reps, 1, 1), dtype=int)

    for i in range(len(frames)):
        for j in range(n_reps):
            init_positions[i*n_reps + j, 0, 0] = frames[i][1]

            full_frame = (frames[i][0][0], frames[i][0][1], frames[i][0][2], j)
            frame_mapping.append(full_frame)

    return init_positions, frame_mapping


class Rates():
    """Store the rate calcuations for various methods by cycle"""
    def __init__(self, n_reps=None, orig_cycles=None, async_cycles=None):
        self.n_reps = n_reps
        self.orig_cycles = orig_cycles
        self.async_cycles = async_cycles
        self.rates = {}

    def add_rate(self, name, rates):
        """Add rates for a method

        Args:
            name (str): Method name, e.g. 'd_eigvec'
            rates (list): Rate at each cycle"""
        self.rates[name] = rates


def update_rate_in_list(rates_list, orig_cycles, async_cycles, n_reps, method, rate):
    """Look for the matching Rates object within the list, if it is present add the rate to the
    object, otherwise create a new Rates object and append it to the list

    Args:
        rates_list (list-like object): List containing all rates
        orig_cycles (unsigned int): Original cycle threshold
        async_cycles (unsigned int): Async. cycle threshold
        n_reps (unsigned int): Number of replicates
        method (str): Async. method, e.g. 'd_eigvec'
        rate (list-like object): The rate per cycle
    """

    found = False

    for rate_obj in rates_list:
        if rate_obj.orig_cycles == orig_cycles and rate_obj.async_cycles == async_cycles and rate_obj.n_reps == n_reps:
            rate_obj.add_rate(method, rate)
            found = True
            break

    if not found:
        new_rate_obj = Rates(n_reps=n_reps, orig_cycles=orig_cycles, async_cycles=async_cycles)
        new_rate_obj.add_rate(method, rate)
        rates_list.append(new_rate_obj)


def process_run(run):
    """Run Asynchronous wepy for a single run. This will get the results of
    the wepy simulation from a wepy HDF5 file, calculate the starting points
    using the asynchronous methods, run the trajectories and calculate the
    unbinding rate

    Args:
        run (unsigned int): The run number to process

    Returns:
        final_rates: The rate at the last cycle
        async_starting_points_pos: The starting positions selected for the async. trajectories
        async_starting_points_weights: The weights for the selected async. trajectories
        all_rates: Rates at each cycle for each async method, the random method and the original rates
    """
    print('Starting run', run)

    n_entries = 0
    for revo_max_cycles in REVO_MAX_CYCLES:
        for async_max_cycles in ASYNC_MAX_CYCLES:
            if revo_max_cycles >= async_max_cycles:
                n_entries += 1

    final_rates = {method:np.ones(n_entries) for method in METHODS}
    final_rates['revo'] = np.ones(n_entries)
    final_rates['random'] = np.ones(n_entries)

    entry = {method:0 for method in METHODS}
    entry['revo'] = 0
    entry['random'] = 0

    run_path = join(ALL_RUNS_PATH, f'rw_{run}')

    async_starting_points_weights = []
    async_starting_points_pos = []

    all_rates = []

    for revo_max_cycles in REVO_MAX_CYCLES:
        MAIN_RW_DIR = join(run_path, f'{revo_max_cycles}_revo_cycles')
        makedirs(MAIN_RW_DIR, exist_ok=True)

        MAIN_RW_PATH = join(MAIN_RW_DIR, 'main_rw.pkl')

        # To speed up reptitive processing, the results for the given parameter
        # set are saved to disk after each step so that they can be loaded as necessary
        try:
            # TODO Find a way to save these intermediate results
            #with open(MAIN_RW_PATH, 'rb') as f:
                #rw = pickle.load(f)
            raise FileNotFoundError
        except FileNotFoundError:
            # Build a CSN from the original Wepy HDF5 file. The CSN contains
            # member variables that will be used later, such as the transition
            # matrix and committors
            rw = MainRW(input_dir=dirname(hdf5_filenames[run]),
                        output_dir=MAIN_RW_DIR,
                        max_cycles=revo_max_cycles,
                        input_file=args.input_file,
                        input_path=hdf5_filenames[run])
            rw.get_csn(n_clusters=args.n_clusters)

            #with open(MAIN_RW_PATH, 'wb') as f:
                #pickle.dump(rw, f)

        # METHODS are the async. methods used to select the starting points for
        # additional sampling {d_eigvec, d_committor, max_flux}
        for method in METHODS:
            # TODO This results in the revo and random results being overwritten with each subsequent async. method
            METHOD_DIR = join(MAIN_RW_DIR, str(method))
            makedirs(METHOD_DIR, exist_ok=True)

            for async_max_cycles in ASYNC_MAX_CYCLES:
                # TODO Do the weights for new trajectories make sense if they run for more cycles than the revo simulation?
                if async_max_cycles > revo_max_cycles:
                    continue

                print('Getting starting frames')
                ASYNC_CYCLES_PATH = join(METHOD_DIR, 'async_cycles.pkl')
                ASYNC_BEST_FRAMES_WEIGHTS_PATH = join(METHOD_DIR, 'best_rw_states_weights.txt')

                try:
                    #raise FileNotFoundError

                    with open(ASYNC_CYCLES_PATH, 'rb') as f:
                        best_frames, random_frames = pickle.load(f)

                    np.loadtxt(ASYNC_BEST_FRAMES_WEIGHTS_PATH)
                    
                except FileNotFoundError:
                    # Get the starting frames to be used for additional sampling. 'best_frames' are selected using the async. method, 'random_frames' are randomly chosen
                    best_frames, random_frames = rw.get_states(n_async_clusters=args.n_async_clusters,
                                                               eigvals=args.eigvals,
                                                               n_async_frames_per_cluster=args.n_async_frames_per_cluster,
                                                               method=method,
                                                               committors=args.committors,
                                                               min_committor=args.min_committor,
                                                               output_dir=METHOD_DIR)

                    with open(ASYNC_CYCLES_PATH, 'wb') as f:
                        pickle.dump((best_frames, random_frames), f)

                # Plot the weights of the selected frames as well as the expected weight for each position
                pos = [entry[1] for entry in best_frames]
                async_starting_points_pos.append(pos)
                best_frames_weights = np.loadtxt(ASYNC_BEST_FRAMES_WEIGHTS_PATH)
                async_starting_points_weights.append(best_frames_weights)
                states = np.arange(len(EXP_FLUX))
                #weight_ratios = [warped_weights[i] / exp_flux[warp_pos_prev[i]] for i in range(len(warp_pos_prev))]

                plt.plot(states, EXP_FLUX, label='Expected Weight For Position')
                plt.plot(pos, best_frames_weights, marker='o', linestyle='None', label='Starting Positions')
                plt.yscale('log')
                plt.title(f'Async Starting Points - {method}, {revo_max_cycles} REVO Cycles')
                plt.xlabel('x')
                plt.ylabel('Weight')
                plt.legend()
                plt.savefig(join(METHOD_DIR, 'async_vs_expected_weights.png'))
                plt.clf()

                # Create a mapping between frame indices and the walker in the async. Wepy HDF5 file
                n_cycles = get_n_cycles(best_frames, revo_max_cycles)
                init_positions, best_frame_mapping = get_init_pos_and_frame_mapping(max(args.n_async_reps), best_frames)

                print('Running async. trajectories')
                ASYNC_CYCLES_DIR = join(METHOD_DIR, f'{async_max_cycles}_async_cycles')
                makedirs(ASYNC_CYCLES_DIR, exist_ok=True)

                # Run the async. trajectories
                ASYNC_RW_HDF5_PATH = join(ASYNC_CYCLES_DIR, 'rw_async_results.wepy.h5')
                #if True or not isfile(ASYNC_RW_HDF5_PATH):
                if not isfile(ASYNC_RW_HDF5_PATH):
                    print('Running async. trajectories to save in', ASYNC_RW_HDF5_PATH)
                    RWConventional.run(n_runs=1,
                                       n_cycles=n_cycles,
                                       n_walkers=args.n_async_clusters * args.n_async_frames_per_cluster * max(args.n_async_reps),
                                       n_dimension=1,
                                       boundary=args.n_states - 2,
                                       probability_file=args.probability_file,
                                       init_positions=init_positions,
                                       outputs_dir=ASYNC_CYCLES_DIR,
                                       output_filename='rw_async_results.wepy')

                n_cycles = get_n_cycles(random_frames, revo_max_cycles)
                init_positions, random_frame_mapping = get_init_pos_and_frame_mapping(max(args.n_async_reps), random_frames)

                RANDOM_RW_HDF5_PATH = join(ASYNC_CYCLES_DIR, 'rw_random_results.wepy.h5')

                # Run the random trajectories
                #if True or not isfile(RANDOM_RW_HDF5_PATH):
                if not isfile(RANDOM_RW_HDF5_PATH):
                    print('Running random trajectories to save in', RANDOM_RW_HDF5_PATH)
                    RWConventional.run(n_runs=1,
                                       n_cycles=n_cycles,
                                       n_walkers=args.n_async_clusters * args.n_async_frames_per_cluster * max(args.n_async_reps),
                                       n_dimension=1,
                                       boundary=args.n_states - 2,
                                       probability_file=args.probability_file,
                                       init_positions=init_positions,
                                       outputs_dir=ASYNC_CYCLES_DIR,
                                       output_filename='rw_random_results.wepy')

                # Calculate rates for varying numbers of replicates
                print('Calculating rates')
                for n_rep in args.n_async_reps:
                    N_REP_DIR = join(ASYNC_CYCLES_DIR, f'{n_rep}_reps')
                    makedirs(N_REP_DIR, exist_ok=True)
                    ORIG_RATES_PATH = join(N_REP_DIR, f'orig_rates.npy')
                    ADJ_RATES_PATH = join(N_REP_DIR, f'adj_rates.npy')
                    RANDOM_RATES_PATH = join(N_REP_DIR, f'random_rates.npy')

                    try:
                        orig_rates = np.load(ORIG_RATES_PATH)
                        adj_rates = np.load(ADJ_RATES_PATH)
                        random_rates = np.load(RANDOM_RATES_PATH)
                    except FileNotFoundError:
                        # Calculate the rates
                        orig_rates, adj_rates, random_rates = CalcRates.run(args.output_dir,
                                                                            method,
                                                                            n_rep,
                                                                            max_cycles=revo_max_cycles,
                                                                            orig_max_cycles=revo_max_cycles,
                                                                            async_max_cycles=async_max_cycles,
                                                                            async_hdf5_path=ASYNC_RW_HDF5_PATH,
                                                                            random_hdf5_path=RANDOM_RW_HDF5_PATH,
                                                                            best_frame_mapping=best_frame_mapping,
                                                                            random_frame_mapping=random_frame_mapping,
                                                                            input_hdf5_path=hdf5_filenames[run],
                                                                            testing=args.testing)

                        adj_rates = adj_rates[:async_max_cycles]
                        random_rates = random_rates[:async_max_cycles]

                        np.save(ORIG_RATES_PATH, orig_rates)
                        np.save(ADJ_RATES_PATH, adj_rates)
                        np.save(RANDOM_RATES_PATH, random_rates)

                    print(f'Run {run} REVO min: {np.min(orig_rates)}, max: {np.max(orig_rates)}')

                    update_rate_in_list(all_rates, revo_max_cycles, async_max_cycles, n_rep, method='revo', rate=PlotRW.get_running_avg(orig_rates))
                    update_rate_in_list(all_rates, revo_max_cycles, async_max_cycles, n_rep, method=method, rate=PlotRW.get_running_avg(adj_rates))
                    update_rate_in_list(all_rates, revo_max_cycles, async_max_cycles, n_rep, method='random', rate=PlotRW.get_running_avg(random_rates))

                    plot = PlotRW(N_REP_DIR, committor_type='peaked', fig_index=run)
                    plot.running_avg_unbinding_rate(orig=orig_rates, best=adj_rates, random=random_rates, run=run)
                    plot.error_by_run(orig=orig_rates, best=adj_rates, random=random_rates, run=run)
                    print('Saved plot')

                    # Store final rates for plotting
                    # TODO Work with multiple numbers of replicates?
                    if n_rep == max(args.n_async_reps):
                        running_avg = plot.get_running_avg(adj_rates)
                        final_rates[method][entry[method]] = running_avg[-1]
                        entry[method] += 1

                        if entry['revo'] < final_rates['revo'].shape[0]:
                            running_avg = plot.get_running_avg(orig_rates)
                            final_rates['revo'][entry['revo'] % n_entries] = running_avg[-1]
                            entry['revo'] += 1

                        if entry['random'] < final_rates['random'].shape[0]:
                            running_avg = plot.get_running_avg(random_rates)
                            final_rates['random'][entry['random'] % n_entries] = running_avg[-1]
                            entry['random'] += 1

    
    return final_rates, async_starting_points_pos, async_starting_points_weights, all_rates


def get_boxplot_vals(n_runs, all_rates, method, plot_cycles_max=100, step_interval=5):
    """Get values for boxplot

    Args:
        n_runs (unsigned int): Number of runs
        all_rates (list-like object): The rates by cycle
        method (str): The method that is being calculated
        plot_cycles_max (int, optional): Cycle threshold to plot. Defaults to 100.
        step_interval (int, optional): Interval of steps to plot. Defaults to 5.

    Returns:
        _type_: _description_
    """
    PLOT_CYCLES = np.arange(start=0, stop=plot_cycles_max, step=step_interval)

    # Store with shape (runs, instances, 10 values)
    rates = np.empty((len(hdf5_filenames), len(all_rates[0]), len(PLOT_CYCLES)))

    for run in range(n_runs):
        for i in range(len(all_rates[0])):
            for revo_j, rates_j in enumerate(range(0, plot_cycles_max, step_interval)):
                rates[run,i,revo_j] = all_rates[run][i].rates[method][rates_j]

    vals = []
    labels = []
    y_min = 1
    y_max = 0
    for i in range(len(PLOT_CYCLES)):
        val = (np.ravel(rates[:,:,i]))
        val = [element for element in val if element != 0]

        if len(val) > 0:
            y_min = np.min((y_min, np.min(val)))
            y_max = np.max((y_max, np.max(val)))

        vals.append(val)

        if method == 'revo':
            labels.append(str(10*PLOT_CYCLES[i]))
        else:
            labels.append(str(10*PLOT_CYCLES[i] + 10*plot_cycles_max))

    # Get y-axis bounds for log plot
    y_range = (y_min / 10, y_max * 10)
    y_range = (10**np.floor(np.log10(y_range[0])), 10**np.ceil(np.log10(y_range[1])))

    return vals, labels, y_range


if __name__ == '__main__':
    def get_inputs():
        """Get command line arguments from the user"""
        parser = ArgumentParser()

        parser.add_argument('-i', dest='input_dir', default=None, help='Input directory')
        parser.add_argument('-o', dest='output_dir', help='Output directory')
        parser.add_argument('-c', dest='n_clusters', type=int, default=100, help='Number of clusters to use')
        parser.add_argument('-b', dest='eigv_begin', type=int, default=3, help='Beginning eigenvalue to use for scoring')
        parser.add_argument('-e', dest='eigv_end', type=int, default=6,
                            help='Ending eigenvalue to use for scoring (inclusive)')
        parser.add_argument('-a', dest='clustering_alg', default='MinibatchKMeans', help='Clustering alorithm to use')
        parser.add_argument('-m', dest='method', default='d_committors', help='Method used to select points for additional sampling, i.e. "d_committors", "flux" or "d_eigvec"')

        parser.add_argument('--n_async_clusters', dest='n_async_clusters', type=int, default=3, help='Number of async clusters to select for additional sampling')
        parser.add_argument('--n_async_frames_per_cluster', dest='n_async_frames_per_cluster', type=int, default=0, help='Number of frames to select from each cluster')
        parser.add_argument('--n_async_reps', dest='n_async_reps', nargs='+', type=int, default=1, help='Number of replicates to run')

        parser.add_argument('--max_cycles', dest='max_cycles', type=int, default=None, help='Maximum cycle to consider for launching new trajectories, default uses all available cycles')
        parser.add_argument('--committors', dest='committors', default=None, help='Path to the committors file')
        parser.add_argument('--input_file', dest='input_file', default=None, help='Path to a file listing the paths to each WepyHDF5 file to use as inputs')
        parser.add_argument('--min_committor', dest='min_committor', type=float, default=0.001, help='Minimum committor value to use (values lower than this will be set to zero')
        parser.add_argument('--eigvals', dest='eigvals', type=int)
        parser.add_argument('--states', dest='n_states', type=int, default=20, help='Number of states')
        parser.add_argument('--prob_file', dest='probability_file', help='File containing forward probability for each state')
        parser.add_argument('--testing', dest='testing', action='store_true', default=False)
        parser.add_argument('--debugging', dest='debugging', action='store_true', default=False)
        parser.add_argument('--new_hdf5_dir', dest='new_hdf5_dir', default=None, help='Input directory of new wepy HDF5 files')

        args = parser.parse_args()

        if args.eigv_begin == args.eigv_end:
            args.eigvals = args.eigv_begin
        else:
            args.eigvals = np.arange(args.eigv_begin, args.eigv_end + 1)

        args.probability_file = realpath(args.probability_file)

        if args.input_file is not None:
            args.input_file = realpath(args.input_file)

        if args.new_hdf5_dir is not None:
            args.new_hdf5_dir = realpath(args.new_hdf5_dir)
        
        if type(args.n_async_reps) is not list:
            args.n_async_reps = list(args.n_async_reps)

        return args

    print('Starting at', datetime.now())
    args = get_inputs()

    # TODO Use this as a flag to calculate all values from scratch?
    OVERWRITE = False

    # This is the expected value as calculated in rw_prob_peak.py
    EXP_FLUX_CONVERGED = 9.004059477104087e-10

    if args.input_file is None:
        # Using natural sorting so that filenames are listed in the correct order
        # based on their name, e.g. file 1, file 2, ..., file 10
        hdf5_filenames = natsorted(glob(join(args.input_dir, '**/*.h5'), recursive=True))
    else:
        hdf5_filenames = pickle.load(open(args.input_file, 'rb'))

    if args.debugging:
        hdf5_filenames = hdf5_filenames[:2]
        REVO_MAX_CYCLES = (args.max_cycles,)
        ASYNC_MAX_CYCLES = (min(args.max_cycles, 100),)
        METHODS = ('flux', 'd_eigvec', 'd_committors')
    else:
        REVO_MAX_CYCLES = (args.max_cycles,)
        ASYNC_MAX_CYCLES = (min(args.max_cycles, 100),)
        METHODS = ('flux', 'd_eigvec', 'd_committors')

    plot_colors = {'revo':'tab:blue',
                   'random':'tab:brown',
                   'flux':'tab:orange',
                   'd_eigvec':'tab:green',
                   'd_committors':'tab:purple'}

    revo_cycles = []
    async_cycles = []

    ALL_RUNS_PATH = join(args.output_dir, 'runs')

    ALL_FINAL_RATES_PATH = join(args.output_dir, 'all_final_rates.pkl')
    ALL_RATES_PATH = join(args.output_dir, 'all_rates.pkl')
    ASYNC_POINTS = join(args.output_dir, 'async_starting_points.npz')
    try:
        #raise FileNotFoundError

        with open(ALL_FINAL_RATES_PATH, 'rb') as f:
            all_final_rates = pickle.load(f)

        with open(ALL_RATES_PATH, 'rb') as f:
            all_rates = pickle.load(f)

        # Attempt to find the results for the given parameter set. If they're available load them
        # from the pickle files, otherwise begin calculating them
        found = False
        for rates in all_rates[0]:
            if rates.orig_cycles == REVO_MAX_CYCLES[0] and rates.async_cycles == ASYNC_MAX_CYCLES[0] and rates.n_reps == args.n_async_reps[0]:
                found = True
                break

        if not found:
            raise FileNotFoundError

        async_points = np.load(ASYNC_POINTS)
        async_pos = async_points['async_pos']
        async_weights = async_points['async_weights']
    except FileNotFoundError:
        # If debugging, process runs iteratively to make debugging easier. Otherwise process runs in parallel
        if args.debugging:
            results = [process_run(i) for i in range(len(hdf5_filenames))]
        else:
            N_JOBS = min(50, len(hdf5_filenames))
            results = Parallel(n_jobs=N_JOBS, backend='multiprocessing')(delayed(process_run)(i) for i in range(len(hdf5_filenames)))

        all_final_rates = []
        all_rates = []
        async_pos = []
        async_weights = []

        # The results are stored as a list in order of run, e.g. [[run 0 results], [run 1 results], [run N results]...]
        # This will store the results by type in order of run
        for i in range(len(results)):
            all_final_rates.append(results[i][0])
            async_pos.append(results[i][1])
            async_weights.append(results[i][2])
            all_rates.append(results[i][3])

        with open(ALL_FINAL_RATES_PATH, 'wb') as f:
            pickle.dump(all_final_rates, f)

        with open(ALL_RATES_PATH, 'wb') as f:
            pickle.dump(all_rates, f)

        np.savez(ASYNC_POINTS, async_pos=async_pos, async_weights=async_weights)

    # Plot the unbinding rate every 10 cycles as a box-and-whisker plot
    interval = ASYNC_MAX_CYCLES[0] // 10
    #revo_vals, revo_labels, revo_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='revo', plot_cycles_max=100, step_interval=5)
    #revo_vals, revo_labels, revo_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='revo', plot_cycles_max=min(args.max_cycles, 100), step_interval=5)
    revo_vals, revo_labels, revo_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='revo', plot_cycles_max=min(args.max_cycles, 100), step_interval=interval)
    plt.boxplot(revo_vals, labels=revo_labels, showmeans=True)
    plt.axhline(EXP_FLUX_CONVERGED, linestyle='--', label='Expected value at steady state')
    plt.grid(axis='y')
    plt.legend(loc='lower right')
    plt.xticks(rotation=45)
    plt.yscale('log')
    plt.ylim(revo_y_range)
    plt.title(f'REVO 1D Random Walk Unbinding Rates ({len(hdf5_filenames)} Runs)')
    plt.xlabel('Steps')
    plt.ylabel('Unbinding Rate')
    plt.tight_layout()
    plt.savefig(join(args.output_dir, f'{REVO_MAX_CYCLES[0]}_revo_{ASYNC_MAX_CYCLES[0]}_async_revo_every_10_cycles.png'))
    plt.clf()

    #flux_vals, flux_labels, flux_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='flux', plot_cycles_max=min(args.max_cycles, 100), step_interval=5)
    flux_vals, flux_labels, flux_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='flux', plot_cycles_max=min(args.max_cycles, 100), step_interval=interval)
    plt.boxplot(flux_vals, labels=flux_labels, showmeans=True)
    plt.axhline(EXP_FLUX_CONVERGED, linestyle='--', label='Expected value at steady state')
    plt.grid(axis='y')
    plt.legend(loc='lower right')
    plt.xticks(rotation=45)
    plt.yscale('log')
    plt.ylim(flux_y_range)
    plt.title(f'REVO + Max. Flux 1D Random Walk Unbinding Rates ({len(hdf5_filenames)} Runs)')
    plt.xlabel('Steps')
    plt.ylabel('Unbinding Rate')
    plt.tight_layout()
    plt.savefig(join(args.output_dir, f'{REVO_MAX_CYCLES[0]}_revo_{ASYNC_MAX_CYCLES[0]}_async_flux_every_10_cycles.png'))
    plt.clf()

    d_eigvec_vals, d_eigvec_labels, d_eigvec_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='d_eigvec', plot_cycles_max=min(args.max_cycles, 100), step_interval=interval)
    plt.boxplot(d_eigvec_vals, labels=d_eigvec_labels, showmeans=True)
    plt.axhline(EXP_FLUX_CONVERGED, linestyle='--', label='Expected value at steady state')
    plt.grid(axis='y')
    plt.legend(loc='lower right')
    plt.xticks(rotation=45)
    plt.yscale('log')
    plt.ylim(d_eigvec_y_range)
    plt.title(f'REVO + d_Eigvec 1D Random Walk Unbinding Rates ({len(hdf5_filenames)} Runs)')
    plt.xlabel('Steps')
    plt.ylabel('Unbinding Rate')
    plt.tight_layout()
    plt.savefig(join(args.output_dir, f'{REVO_MAX_CYCLES[0]}_revo_{ASYNC_MAX_CYCLES[0]}_async_d_eigvec_every_10_cycles.png'))
    plt.clf()

    random_vals, random_labels, random_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='random', plot_cycles_max=min(args.max_cycles, 100), step_interval=interval)
    plt.boxplot(random_vals, labels=random_labels, showmeans=True)
    plt.axhline(EXP_FLUX_CONVERGED, linestyle='--', label='Expected value at steady state')
    plt.grid(axis='y')
    plt.legend(loc='lower right')
    plt.xticks(rotation=45)
    plt.yscale('log')
    plt.ylim(random_y_range)
    plt.title(f'REVO + Random 1D Random Walk Unbinding Rates ({len(hdf5_filenames)} Runs)')
    plt.xlabel('Steps')
    plt.ylabel('Unbinding Rate')
    plt.tight_layout()
    plt.savefig(join(args.output_dir, f'{REVO_MAX_CYCLES[0]}_revo_{ASYNC_MAX_CYCLES[0]}_async_random_every_10_cycles.png'))
    plt.clf()

    #revo_vals, revo_labels, revo_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='revo', plot_cycles_max=args.max_cycles, step_interval=500)
    revo_vals, revo_labels, revo_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='revo', plot_cycles_max=args.max_cycles, step_interval=interval)

    revo_mean = [np.mean(revo_vals[i]) for i in range(len(revo_vals))]

    plt.boxplot(revo_vals, labels=revo_labels, showmeans=True)
    #plt.plot(revo_mean, labels=revo_labels, markers='_', color='red')
    #plt.plot(revo_mean, labels=revo_labels)
    plt.axhline(EXP_FLUX_CONVERGED, linestyle='--', label='Expected value at steady state')
    plt.legend(loc='lower right')
    plt.xticks(rotation=45)
    plt.yscale('log')
    plt.ylim(revo_y_range)
    plt.title(f'REVO 1D Random Walk Unbinding Rates ({len(hdf5_filenames)} Runs)')
    plt.xlabel('Steps')
    plt.ylabel('Unbinding Rate')
    plt.tight_layout()
    #plt.savefig(join(args.output_dir, 'revo_every_20_cycles.png'))
    plt.savefig(join(args.output_dir, f'{REVO_MAX_CYCLES[0]}_revo_{ASYNC_MAX_CYCLES[0]}_async_revo_every_20_cycles.png'))
    plt.clf()
    
    plt.plot(np.arange(len(EXP_FLUX)), EXP_FLUX, label='Expected Weight For Position')
    plt.plot(np.ravel(async_pos), np.ravel(async_weights), marker='o', linestyle='None', label='Starting Positions')
    plt.yscale('log')
    plt.title('Async Starting Points')
    plt.xlabel('x')
    plt.ylabel('Weight')
    plt.legend()
    #plt.savefig(join(args.output_dir, 'async_vs_expected_weights.png'))
    plt.savefig(join(args.output_dir, f'{REVO_MAX_CYCLES[0]}_revo_{ASYNC_MAX_CYCLES[0]}_async_vs_expected_weights.png'))
    plt.clf()

    N_RUNS = len(hdf5_filenames)

    # Calculate average unbinding rates for all methods and parameters
    avg_final_rates = deepcopy(all_final_rates[0])
    for run in range(1, N_RUNS):
        for method in avg_final_rates:
           avg_final_rates[method] += all_final_rates[run][method]

    denom = np.full_like(avg_final_rates['revo'], N_RUNS)
    for method in avg_final_rates:
        avg_final_rates[method] /= denom

    final_rates = avg_final_rates

    plot = PlotRW(args.output_dir, committor_type='peaked', fig_index=N_RUNS)
    max_rate = plot.exp_flux_converged
    min_rate = plot.exp_flux_converged

    # Set all values of 0 to 'None' for plotting
    for method in final_rates.keys():
        for i in range(len(final_rates[method])):
            if final_rates[method][i] == 0:
                final_rates[method][i] = np.nan

        max_rate = max(max_rate, np.nanmax(final_rates[method]))
        min_rate = min(min_rate, np.nanmin(final_rates[method]))

    plt.axhline(plot.exp_flux_converged, linestyle='--', label='Expected value at steady state')
    plt.legend()
    plt.xscale('log')
    y_min = 10**np.floor(np.log10(min_rate))
    y_max = 10**np.ceil(np.log10(max_rate))
    plt.ylim([y_min, y_max])
    plt.yscale('log')
    plt.xlabel('Total Steps (REVO + Async. Method)')
    plt.ylabel('Avg. Unbinding Rate by Run (50 Runs)')
    FINAL_RATES_PATH = join(args.output_dir, 'final_rates.png')
    plt.savefig(FINAL_RATES_PATH)

    #N_SAMPLES = len(REVO_MAX_CYCLES) * len(ASYNC_MAX_CYCLES)
    N_SAMPLES = 0
    for revo_max_cycles in REVO_MAX_CYCLES:
        for async_max_cycles in ASYNC_MAX_CYCLES:
            if revo_max_cycles > async_max_cycles:
                N_SAMPLES += 1

    N_REVO_WALKERS = 48
    # TODO Why is args.n_async_reps a list?
    N_ASYNC_WALKERS = args.n_async_clusters * args.n_async_frames_per_cluster * args.n_async_reps[0]

    for method in final_rates.keys():
        # REVO simulations are averaged for a given parameter set and plotted once
        if method == 'revo':
            label = method
            x_axis = [cycles * 10 * N_REVO_WALKERS for cycles in REVO_MAX_CYCLES]
            revo_avg = np.zeros(len(REVO_MAX_CYCLES))
            counts = np.zeros(len(REVO_MAX_CYCLES))

            n_entry = 0
            for i, revo_max_cycles in enumerate(REVO_MAX_CYCLES):
                for j, async_max_cycles in enumerate(ASYNC_MAX_CYCLES):
                    if async_max_cycles <= revo_max_cycles:
                        revo_avg[i] += final_rates[method][n_entry]
                        counts[i] += 1

                        n_entry += 1

            revo_avg /= counts

            if method in plt.gca().get_legend_handles_labels()[1]:
                label = ''
            else:
                label = method

            #plt.plot(x_axis, revo_avg, color=plot_colors[method], label=label)
            plt.plot(x_axis, revo_avg, color=plot_colors[method], label=label)
            plt.savefig(FINAL_RATES_PATH, bbox_extra_artists=(legend,), bbox_inches='tight')
        else:
            n_entry = 0
            for i, revo_max_cycles in enumerate(REVO_MAX_CYCLES):
                vals = []
                x_axis = []
                for j, async_max_cycles in enumerate(ASYNC_MAX_CYCLES):
                    if async_max_cycles <= revo_max_cycles:
                        #label = f'{method} ({revo_max_cycles*10} revo steps)'
                        if method in plt.gca().get_legend_handles_labels()[1]:
                            label = ''
                        else:
                            label = method

                        x_axis.append((revo_max_cycles + async_max_cycles) * 10 * N_ASYNC_WALKERS)
                
                        vals.append(final_rates[method][n_entry])
                        n_entry += 1

                if len(vals) == 1:
                    plt.plot(x_axis, vals, marker='o', color=plot_colors[method], label=label)
                else:
                    plt.plot(x_axis, vals, color=plot_colors[method], label=label)

                legend = plt.legend(bbox_to_anchor=(1.1, 1.1))
                plt.savefig(FINAL_RATES_PATH, bbox_extra_artists=(legend,), bbox_inches='tight')
                #plt.legend()
                #plt.savefig(FINAL_RATES_PATH)


    if False:
        plt.axhline(plot.exp_flux_converged, linestyle='--', label='Expected value at steady state')
        plt.legend()
        #plt.xscale('log')
        y_min = 10**np.floor(np.log10(min_rate))
        y_max = 10**np.ceil(np.log10(max_rate))
        plt.ylim([y_min, y_max])
        plt.yscale('log')
        plt.xlabel('Total Steps (REVO + Async. Method)')
        plt.ylabel('Avg. Unbinding Rate by Run (50 Runs)')
        FINAL_RATES_PATH = join(args.output_dir, 'final_rates.png')

    #plt.savefig(FINAL_RATES_PATH)
    plt.savefig(FINAL_RATES_PATH, bbox_extra_artists=(legend,), bbox_inches='tight')
    plt.clf()

    print('Run finished')