import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from os import makedirs
from os.path import join, realpath, isfile, dirname
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
import numpy as np
import pickle
from glob import glob
from natsort import natsorted
from joblib import Parallel, delayed
from main_random_walk import MainRW
from rw_conventional import RWConventional
from calc_rates import CalcRates
from plot_rw_results import PlotRW
EXP_FLUX = np.loadtxt('/dickson/s2/jim/repos/wepy_async/expected_flux_peaked_prob_40_states.txt')
def get_n_cycles(frames, total_cycles):
"""Calculate the minimum number of cycles to accommodate each trajectory.
Cycles ->
Total cycles: |--------------->|
Trajectory 1: |--->|
Trajectory 2: |-------->|
Trajectory 3: |----->|
Min. cycles: |<------->|
Args:
frames (list-like object with shape (N, 3)): Frame indices as (run, walker, cycle)
total_cycles (unsigned int): The maximum number of async. cycles
Returns:
unsigned int: The minimum number of cycles
"""
min_starting_cycle = frames[0][0][2]
for i in range(1, len(frames)):
if min_starting_cycle > frames[i][0][2]:
min_starting_cycle = frames[i][0][2]
n_cycles = total_cycles - min_starting_cycle
return n_cycles
def get_init_pos_and_frame_mapping(n_reps, frames):
"""Convert
Args:
n_reps (unsigned int): Number of replicates for each frame
frames (list-like object of shape (N, 2, (1, 3))): List-like object with each entry being (initial position, frame index)
Returns:
2-tuple with shape (1, 3): Initial positions and frame index for each replicate
"""
frame_mapping = []
init_positions = np.empty((len(frames)*n_reps, 1, 1), dtype=int)
for i in range(len(frames)):
for j in range(n_reps):
init_positions[i*n_reps + j, 0, 0] = frames[i][1]
full_frame = (frames[i][0][0], frames[i][0][1], frames[i][0][2], j)
frame_mapping.append(full_frame)
return init_positions, frame_mapping
class Rates():
"""Store the rate calcuations for various methods by cycle"""
def __init__(self, n_reps=None, orig_cycles=None, async_cycles=None):
self.n_reps = n_reps
self.orig_cycles = orig_cycles
self.async_cycles = async_cycles
self.rates = {}
def add_rate(self, name, rates):
"""Add rates for a method
Args:
name (str): Method name, e.g. 'd_eigvec'
rates (list): Rate at each cycle"""
self.rates[name] = rates
def update_rate_in_list(rates_list, orig_cycles, async_cycles, n_reps, method, rate):
"""Look for the matching Rates object within the list, if it is present add the rate to the
object, otherwise create a new Rates object and append it to the list
Args:
rates_list (list-like object): List containing all rates
orig_cycles (unsigned int): Original cycle threshold
async_cycles (unsigned int): Async. cycle threshold
n_reps (unsigned int): Number of replicates
method (str): Async. method, e.g. 'd_eigvec'
rate (list-like object): The rate per cycle
"""
found = False
for rate_obj in rates_list:
if rate_obj.orig_cycles == orig_cycles and rate_obj.async_cycles == async_cycles and rate_obj.n_reps == n_reps:
rate_obj.add_rate(method, rate)
found = True
break
if not found:
new_rate_obj = Rates(n_reps=n_reps, orig_cycles=orig_cycles, async_cycles=async_cycles)
new_rate_obj.add_rate(method, rate)
rates_list.append(new_rate_obj)
def process_run(run):
"""Run Asynchronous wepy for a single run. This will get the results of
the wepy simulation from a wepy HDF5 file, calculate the starting points
using the asynchronous methods, run the trajectories and calculate the
unbinding rate
Args:
run (unsigned int): The run number to process
Returns:
final_rates: The rate at the last cycle
async_starting_points_pos: The starting positions selected for the async. trajectories
async_starting_points_weights: The weights for the selected async. trajectories
all_rates: Rates at each cycle for each async method, the random method and the original rates
"""
print('Starting run', run)
n_entries = 0
for revo_max_cycles in REVO_MAX_CYCLES:
for async_max_cycles in ASYNC_MAX_CYCLES:
if revo_max_cycles >= async_max_cycles:
n_entries += 1
final_rates = {method:np.ones(n_entries) for method in METHODS}
final_rates['revo'] = np.ones(n_entries)
final_rates['random'] = np.ones(n_entries)
entry = {method:0 for method in METHODS}
entry['revo'] = 0
entry['random'] = 0
run_path = join(ALL_RUNS_PATH, f'rw_{run}')
async_starting_points_weights = []
async_starting_points_pos = []
all_rates = []
for revo_max_cycles in REVO_MAX_CYCLES:
MAIN_RW_DIR = join(run_path, f'{revo_max_cycles}_revo_cycles')
makedirs(MAIN_RW_DIR, exist_ok=True)
MAIN_RW_PATH = join(MAIN_RW_DIR, 'main_rw.pkl')
# To speed up reptitive processing, the results for the given parameter
# set are saved to disk after each step so that they can be loaded as necessary
try:
# TODO Find a way to save these intermediate results
#with open(MAIN_RW_PATH, 'rb') as f:
#rw = pickle.load(f)
raise FileNotFoundError
except FileNotFoundError:
# Build a CSN from the original Wepy HDF5 file. The CSN contains
# member variables that will be used later, such as the transition
# matrix and committors
rw = MainRW(input_dir=dirname(hdf5_filenames[run]),
output_dir=MAIN_RW_DIR,
max_cycles=revo_max_cycles,
input_file=args.input_file,
input_path=hdf5_filenames[run])
rw.get_csn(n_clusters=args.n_clusters)
#with open(MAIN_RW_PATH, 'wb') as f:
#pickle.dump(rw, f)
# METHODS are the async. methods used to select the starting points for
# additional sampling {d_eigvec, d_committor, max_flux}
for method in METHODS:
# TODO This results in the revo and random results being overwritten with each subsequent async. method
METHOD_DIR = join(MAIN_RW_DIR, str(method))
makedirs(METHOD_DIR, exist_ok=True)
for async_max_cycles in ASYNC_MAX_CYCLES:
# TODO Do the weights for new trajectories make sense if they run for more cycles than the revo simulation?
if async_max_cycles > revo_max_cycles:
continue
print('Getting starting frames')
ASYNC_CYCLES_PATH = join(METHOD_DIR, 'async_cycles.pkl')
ASYNC_BEST_FRAMES_WEIGHTS_PATH = join(METHOD_DIR, 'best_rw_states_weights.txt')
try:
#raise FileNotFoundError
with open(ASYNC_CYCLES_PATH, 'rb') as f:
best_frames, random_frames = pickle.load(f)
np.loadtxt(ASYNC_BEST_FRAMES_WEIGHTS_PATH)
except FileNotFoundError:
# Get the starting frames to be used for additional sampling. 'best_frames' are selected using the async. method, 'random_frames' are randomly chosen
best_frames, random_frames = rw.get_states(n_async_clusters=args.n_async_clusters,
eigvals=args.eigvals,
n_async_frames_per_cluster=args.n_async_frames_per_cluster,
method=method,
committors=args.committors,
min_committor=args.min_committor,
output_dir=METHOD_DIR)
with open(ASYNC_CYCLES_PATH, 'wb') as f:
pickle.dump((best_frames, random_frames), f)
# Plot the weights of the selected frames as well as the expected weight for each position
pos = [entry[1] for entry in best_frames]
async_starting_points_pos.append(pos)
best_frames_weights = np.loadtxt(ASYNC_BEST_FRAMES_WEIGHTS_PATH)
async_starting_points_weights.append(best_frames_weights)
states = np.arange(len(EXP_FLUX))
#weight_ratios = [warped_weights[i] / exp_flux[warp_pos_prev[i]] for i in range(len(warp_pos_prev))]
plt.plot(states, EXP_FLUX, label='Expected Weight For Position')
plt.plot(pos, best_frames_weights, marker='o', linestyle='None', label='Starting Positions')
plt.yscale('log')
plt.title(f'Async Starting Points - {method}, {revo_max_cycles} REVO Cycles')
plt.xlabel('x')
plt.ylabel('Weight')
plt.legend()
plt.savefig(join(METHOD_DIR, 'async_vs_expected_weights.png'))
plt.clf()
# Create a mapping between frame indices and the walker in the async. Wepy HDF5 file
n_cycles = get_n_cycles(best_frames, revo_max_cycles)
init_positions, best_frame_mapping = get_init_pos_and_frame_mapping(max(args.n_async_reps), best_frames)
print('Running async. trajectories')
ASYNC_CYCLES_DIR = join(METHOD_DIR, f'{async_max_cycles}_async_cycles')
makedirs(ASYNC_CYCLES_DIR, exist_ok=True)
# Run the async. trajectories
ASYNC_RW_HDF5_PATH = join(ASYNC_CYCLES_DIR, 'rw_async_results.wepy.h5')
#if True or not isfile(ASYNC_RW_HDF5_PATH):
if not isfile(ASYNC_RW_HDF5_PATH):
print('Running async. trajectories to save in', ASYNC_RW_HDF5_PATH)
RWConventional.run(n_runs=1,
n_cycles=n_cycles,
n_walkers=args.n_async_clusters * args.n_async_frames_per_cluster * max(args.n_async_reps),
n_dimension=1,
boundary=args.n_states - 2,
probability_file=args.probability_file,
init_positions=init_positions,
outputs_dir=ASYNC_CYCLES_DIR,
output_filename='rw_async_results.wepy')
n_cycles = get_n_cycles(random_frames, revo_max_cycles)
init_positions, random_frame_mapping = get_init_pos_and_frame_mapping(max(args.n_async_reps), random_frames)
RANDOM_RW_HDF5_PATH = join(ASYNC_CYCLES_DIR, 'rw_random_results.wepy.h5')
# Run the random trajectories
#if True or not isfile(RANDOM_RW_HDF5_PATH):
if not isfile(RANDOM_RW_HDF5_PATH):
print('Running random trajectories to save in', RANDOM_RW_HDF5_PATH)
RWConventional.run(n_runs=1,
n_cycles=n_cycles,
n_walkers=args.n_async_clusters * args.n_async_frames_per_cluster * max(args.n_async_reps),
n_dimension=1,
boundary=args.n_states - 2,
probability_file=args.probability_file,
init_positions=init_positions,
outputs_dir=ASYNC_CYCLES_DIR,
output_filename='rw_random_results.wepy')
# Calculate rates for varying numbers of replicates
print('Calculating rates')
for n_rep in args.n_async_reps:
N_REP_DIR = join(ASYNC_CYCLES_DIR, f'{n_rep}_reps')
makedirs(N_REP_DIR, exist_ok=True)
ORIG_RATES_PATH = join(N_REP_DIR, f'orig_rates.npy')
ADJ_RATES_PATH = join(N_REP_DIR, f'adj_rates.npy')
RANDOM_RATES_PATH = join(N_REP_DIR, f'random_rates.npy')
try:
orig_rates = np.load(ORIG_RATES_PATH)
adj_rates = np.load(ADJ_RATES_PATH)
random_rates = np.load(RANDOM_RATES_PATH)
except FileNotFoundError:
# Calculate the rates
orig_rates, adj_rates, random_rates = CalcRates.run(args.output_dir,
method,
n_rep,
max_cycles=revo_max_cycles,
orig_max_cycles=revo_max_cycles,
async_max_cycles=async_max_cycles,
async_hdf5_path=ASYNC_RW_HDF5_PATH,
random_hdf5_path=RANDOM_RW_HDF5_PATH,
best_frame_mapping=best_frame_mapping,
random_frame_mapping=random_frame_mapping,
input_hdf5_path=hdf5_filenames[run],
testing=args.testing)
adj_rates = adj_rates[:async_max_cycles]
random_rates = random_rates[:async_max_cycles]
np.save(ORIG_RATES_PATH, orig_rates)
np.save(ADJ_RATES_PATH, adj_rates)
np.save(RANDOM_RATES_PATH, random_rates)
print(f'Run {run} REVO min: {np.min(orig_rates)}, max: {np.max(orig_rates)}')
update_rate_in_list(all_rates, revo_max_cycles, async_max_cycles, n_rep, method='revo', rate=PlotRW.get_running_avg(orig_rates))
update_rate_in_list(all_rates, revo_max_cycles, async_max_cycles, n_rep, method=method, rate=PlotRW.get_running_avg(adj_rates))
update_rate_in_list(all_rates, revo_max_cycles, async_max_cycles, n_rep, method='random', rate=PlotRW.get_running_avg(random_rates))
plot = PlotRW(N_REP_DIR, committor_type='peaked', fig_index=run)
plot.running_avg_unbinding_rate(orig=orig_rates, best=adj_rates, random=random_rates, run=run)
plot.error_by_run(orig=orig_rates, best=adj_rates, random=random_rates, run=run)
print('Saved plot')
# Store final rates for plotting
# TODO Work with multiple numbers of replicates?
if n_rep == max(args.n_async_reps):
running_avg = plot.get_running_avg(adj_rates)
final_rates[method][entry[method]] = running_avg[-1]
entry[method] += 1
if entry['revo'] < final_rates['revo'].shape[0]:
running_avg = plot.get_running_avg(orig_rates)
final_rates['revo'][entry['revo'] % n_entries] = running_avg[-1]
entry['revo'] += 1
if entry['random'] < final_rates['random'].shape[0]:
running_avg = plot.get_running_avg(random_rates)
final_rates['random'][entry['random'] % n_entries] = running_avg[-1]
entry['random'] += 1
return final_rates, async_starting_points_pos, async_starting_points_weights, all_rates
def get_boxplot_vals(n_runs, all_rates, method, plot_cycles_max=100, step_interval=5):
"""Get values for boxplot
Args:
n_runs (unsigned int): Number of runs
all_rates (list-like object): The rates by cycle
method (str): The method that is being calculated
plot_cycles_max (int, optional): Cycle threshold to plot. Defaults to 100.
step_interval (int, optional): Interval of steps to plot. Defaults to 5.
Returns:
_type_: _description_
"""
PLOT_CYCLES = np.arange(start=0, stop=plot_cycles_max, step=step_interval)
# Store with shape (runs, instances, 10 values)
rates = np.empty((len(hdf5_filenames), len(all_rates[0]), len(PLOT_CYCLES)))
for run in range(n_runs):
for i in range(len(all_rates[0])):
for revo_j, rates_j in enumerate(range(0, plot_cycles_max, step_interval)):
rates[run,i,revo_j] = all_rates[run][i].rates[method][rates_j]
vals = []
labels = []
y_min = 1
y_max = 0
for i in range(len(PLOT_CYCLES)):
val = (np.ravel(rates[:,:,i]))
val = [element for element in val if element != 0]
if len(val) > 0:
y_min = np.min((y_min, np.min(val)))
y_max = np.max((y_max, np.max(val)))
vals.append(val)
if method == 'revo':
labels.append(str(10*PLOT_CYCLES[i]))
else:
labels.append(str(10*PLOT_CYCLES[i] + 10*plot_cycles_max))
# Get y-axis bounds for log plot
y_range = (y_min / 10, y_max * 10)
y_range = (10**np.floor(np.log10(y_range[0])), 10**np.ceil(np.log10(y_range[1])))
return vals, labels, y_range
if __name__ == '__main__':
def get_inputs():
"""Get command line arguments from the user"""
parser = ArgumentParser()
parser.add_argument('-i', dest='input_dir', default=None, help='Input directory')
parser.add_argument('-o', dest='output_dir', help='Output directory')
parser.add_argument('-c', dest='n_clusters', type=int, default=100, help='Number of clusters to use')
parser.add_argument('-b', dest='eigv_begin', type=int, default=3, help='Beginning eigenvalue to use for scoring')
parser.add_argument('-e', dest='eigv_end', type=int, default=6,
help='Ending eigenvalue to use for scoring (inclusive)')
parser.add_argument('-a', dest='clustering_alg', default='MinibatchKMeans', help='Clustering alorithm to use')
parser.add_argument('-m', dest='method', default='d_committors', help='Method used to select points for additional sampling, i.e. "d_committors", "flux" or "d_eigvec"')
parser.add_argument('--n_async_clusters', dest='n_async_clusters', type=int, default=3, help='Number of async clusters to select for additional sampling')
parser.add_argument('--n_async_frames_per_cluster', dest='n_async_frames_per_cluster', type=int, default=0, help='Number of frames to select from each cluster')
parser.add_argument('--n_async_reps', dest='n_async_reps', nargs='+', type=int, default=1, help='Number of replicates to run')
parser.add_argument('--max_cycles', dest='max_cycles', type=int, default=None, help='Maximum cycle to consider for launching new trajectories, default uses all available cycles')
parser.add_argument('--committors', dest='committors', default=None, help='Path to the committors file')
parser.add_argument('--input_file', dest='input_file', default=None, help='Path to a file listing the paths to each WepyHDF5 file to use as inputs')
parser.add_argument('--min_committor', dest='min_committor', type=float, default=0.001, help='Minimum committor value to use (values lower than this will be set to zero')
parser.add_argument('--eigvals', dest='eigvals', type=int)
parser.add_argument('--states', dest='n_states', type=int, default=20, help='Number of states')
parser.add_argument('--prob_file', dest='probability_file', help='File containing forward probability for each state')
parser.add_argument('--testing', dest='testing', action='store_true', default=False)
parser.add_argument('--debugging', dest='debugging', action='store_true', default=False)
parser.add_argument('--new_hdf5_dir', dest='new_hdf5_dir', default=None, help='Input directory of new wepy HDF5 files')
args = parser.parse_args()
if args.eigv_begin == args.eigv_end:
args.eigvals = args.eigv_begin
else:
args.eigvals = np.arange(args.eigv_begin, args.eigv_end + 1)
args.probability_file = realpath(args.probability_file)
if args.input_file is not None:
args.input_file = realpath(args.input_file)
if args.new_hdf5_dir is not None:
args.new_hdf5_dir = realpath(args.new_hdf5_dir)
if type(args.n_async_reps) is not list:
args.n_async_reps = list(args.n_async_reps)
return args
print('Starting at', datetime.now())
args = get_inputs()
# TODO Use this as a flag to calculate all values from scratch?
OVERWRITE = False
# This is the expected value as calculated in rw_prob_peak.py
EXP_FLUX_CONVERGED = 9.004059477104087e-10
if args.input_file is None:
# Using natural sorting so that filenames are listed in the correct order
# based on their name, e.g. file 1, file 2, ..., file 10
hdf5_filenames = natsorted(glob(join(args.input_dir, '**/*.h5'), recursive=True))
else:
hdf5_filenames = pickle.load(open(args.input_file, 'rb'))
if args.debugging:
hdf5_filenames = hdf5_filenames[:2]
REVO_MAX_CYCLES = (args.max_cycles,)
ASYNC_MAX_CYCLES = (min(args.max_cycles, 100),)
METHODS = ('flux', 'd_eigvec', 'd_committors')
else:
REVO_MAX_CYCLES = (args.max_cycles,)
ASYNC_MAX_CYCLES = (min(args.max_cycles, 100),)
METHODS = ('flux', 'd_eigvec', 'd_committors')
plot_colors = {'revo':'tab:blue',
'random':'tab:brown',
'flux':'tab:orange',
'd_eigvec':'tab:green',
'd_committors':'tab:purple'}
revo_cycles = []
async_cycles = []
ALL_RUNS_PATH = join(args.output_dir, 'runs')
ALL_FINAL_RATES_PATH = join(args.output_dir, 'all_final_rates.pkl')
ALL_RATES_PATH = join(args.output_dir, 'all_rates.pkl')
ASYNC_POINTS = join(args.output_dir, 'async_starting_points.npz')
try:
#raise FileNotFoundError
with open(ALL_FINAL_RATES_PATH, 'rb') as f:
all_final_rates = pickle.load(f)
with open(ALL_RATES_PATH, 'rb') as f:
all_rates = pickle.load(f)
# Attempt to find the results for the given parameter set. If they're available load them
# from the pickle files, otherwise begin calculating them
found = False
for rates in all_rates[0]:
if rates.orig_cycles == REVO_MAX_CYCLES[0] and rates.async_cycles == ASYNC_MAX_CYCLES[0] and rates.n_reps == args.n_async_reps[0]:
found = True
break
if not found:
raise FileNotFoundError
async_points = np.load(ASYNC_POINTS)
async_pos = async_points['async_pos']
async_weights = async_points['async_weights']
except FileNotFoundError:
# If debugging, process runs iteratively to make debugging easier. Otherwise process runs in parallel
if args.debugging:
results = [process_run(i) for i in range(len(hdf5_filenames))]
else:
N_JOBS = min(50, len(hdf5_filenames))
results = Parallel(n_jobs=N_JOBS, backend='multiprocessing')(delayed(process_run)(i) for i in range(len(hdf5_filenames)))
all_final_rates = []
all_rates = []
async_pos = []
async_weights = []
# The results are stored as a list in order of run, e.g. [[run 0 results], [run 1 results], [run N results]...]
# This will store the results by type in order of run
for i in range(len(results)):
all_final_rates.append(results[i][0])
async_pos.append(results[i][1])
async_weights.append(results[i][2])
all_rates.append(results[i][3])
with open(ALL_FINAL_RATES_PATH, 'wb') as f:
pickle.dump(all_final_rates, f)
with open(ALL_RATES_PATH, 'wb') as f:
pickle.dump(all_rates, f)
np.savez(ASYNC_POINTS, async_pos=async_pos, async_weights=async_weights)
# Plot the unbinding rate every 10 cycles as a box-and-whisker plot
interval = ASYNC_MAX_CYCLES[0] // 10
#revo_vals, revo_labels, revo_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='revo', plot_cycles_max=100, step_interval=5)
#revo_vals, revo_labels, revo_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='revo', plot_cycles_max=min(args.max_cycles, 100), step_interval=5)
revo_vals, revo_labels, revo_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='revo', plot_cycles_max=min(args.max_cycles, 100), step_interval=interval)
plt.boxplot(revo_vals, labels=revo_labels, showmeans=True)
plt.axhline(EXP_FLUX_CONVERGED, linestyle='--', label='Expected value at steady state')
plt.grid(axis='y')
plt.legend(loc='lower right')
plt.xticks(rotation=45)
plt.yscale('log')
plt.ylim(revo_y_range)
plt.title(f'REVO 1D Random Walk Unbinding Rates ({len(hdf5_filenames)} Runs)')
plt.xlabel('Steps')
plt.ylabel('Unbinding Rate')
plt.tight_layout()
plt.savefig(join(args.output_dir, f'{REVO_MAX_CYCLES[0]}_revo_{ASYNC_MAX_CYCLES[0]}_async_revo_every_10_cycles.png'))
plt.clf()
#flux_vals, flux_labels, flux_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='flux', plot_cycles_max=min(args.max_cycles, 100), step_interval=5)
flux_vals, flux_labels, flux_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='flux', plot_cycles_max=min(args.max_cycles, 100), step_interval=interval)
plt.boxplot(flux_vals, labels=flux_labels, showmeans=True)
plt.axhline(EXP_FLUX_CONVERGED, linestyle='--', label='Expected value at steady state')
plt.grid(axis='y')
plt.legend(loc='lower right')
plt.xticks(rotation=45)
plt.yscale('log')
plt.ylim(flux_y_range)
plt.title(f'REVO + Max. Flux 1D Random Walk Unbinding Rates ({len(hdf5_filenames)} Runs)')
plt.xlabel('Steps')
plt.ylabel('Unbinding Rate')
plt.tight_layout()
plt.savefig(join(args.output_dir, f'{REVO_MAX_CYCLES[0]}_revo_{ASYNC_MAX_CYCLES[0]}_async_flux_every_10_cycles.png'))
plt.clf()
d_eigvec_vals, d_eigvec_labels, d_eigvec_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='d_eigvec', plot_cycles_max=min(args.max_cycles, 100), step_interval=interval)
plt.boxplot(d_eigvec_vals, labels=d_eigvec_labels, showmeans=True)
plt.axhline(EXP_FLUX_CONVERGED, linestyle='--', label='Expected value at steady state')
plt.grid(axis='y')
plt.legend(loc='lower right')
plt.xticks(rotation=45)
plt.yscale('log')
plt.ylim(d_eigvec_y_range)
plt.title(f'REVO + d_Eigvec 1D Random Walk Unbinding Rates ({len(hdf5_filenames)} Runs)')
plt.xlabel('Steps')
plt.ylabel('Unbinding Rate')
plt.tight_layout()
plt.savefig(join(args.output_dir, f'{REVO_MAX_CYCLES[0]}_revo_{ASYNC_MAX_CYCLES[0]}_async_d_eigvec_every_10_cycles.png'))
plt.clf()
random_vals, random_labels, random_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='random', plot_cycles_max=min(args.max_cycles, 100), step_interval=interval)
plt.boxplot(random_vals, labels=random_labels, showmeans=True)
plt.axhline(EXP_FLUX_CONVERGED, linestyle='--', label='Expected value at steady state')
plt.grid(axis='y')
plt.legend(loc='lower right')
plt.xticks(rotation=45)
plt.yscale('log')
plt.ylim(random_y_range)
plt.title(f'REVO + Random 1D Random Walk Unbinding Rates ({len(hdf5_filenames)} Runs)')
plt.xlabel('Steps')
plt.ylabel('Unbinding Rate')
plt.tight_layout()
plt.savefig(join(args.output_dir, f'{REVO_MAX_CYCLES[0]}_revo_{ASYNC_MAX_CYCLES[0]}_async_random_every_10_cycles.png'))
plt.clf()
#revo_vals, revo_labels, revo_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='revo', plot_cycles_max=args.max_cycles, step_interval=500)
revo_vals, revo_labels, revo_y_range = get_boxplot_vals(n_runs=len(hdf5_filenames), all_rates=all_rates, method='revo', plot_cycles_max=args.max_cycles, step_interval=interval)
revo_mean = [np.mean(revo_vals[i]) for i in range(len(revo_vals))]
plt.boxplot(revo_vals, labels=revo_labels, showmeans=True)
#plt.plot(revo_mean, labels=revo_labels, markers='_', color='red')
#plt.plot(revo_mean, labels=revo_labels)
plt.axhline(EXP_FLUX_CONVERGED, linestyle='--', label='Expected value at steady state')
plt.legend(loc='lower right')
plt.xticks(rotation=45)
plt.yscale('log')
plt.ylim(revo_y_range)
plt.title(f'REVO 1D Random Walk Unbinding Rates ({len(hdf5_filenames)} Runs)')
plt.xlabel('Steps')
plt.ylabel('Unbinding Rate')
plt.tight_layout()
#plt.savefig(join(args.output_dir, 'revo_every_20_cycles.png'))
plt.savefig(join(args.output_dir, f'{REVO_MAX_CYCLES[0]}_revo_{ASYNC_MAX_CYCLES[0]}_async_revo_every_20_cycles.png'))
plt.clf()
plt.plot(np.arange(len(EXP_FLUX)), EXP_FLUX, label='Expected Weight For Position')
plt.plot(np.ravel(async_pos), np.ravel(async_weights), marker='o', linestyle='None', label='Starting Positions')
plt.yscale('log')
plt.title('Async Starting Points')
plt.xlabel('x')
plt.ylabel('Weight')
plt.legend()
#plt.savefig(join(args.output_dir, 'async_vs_expected_weights.png'))
plt.savefig(join(args.output_dir, f'{REVO_MAX_CYCLES[0]}_revo_{ASYNC_MAX_CYCLES[0]}_async_vs_expected_weights.png'))
plt.clf()
N_RUNS = len(hdf5_filenames)
# Calculate average unbinding rates for all methods and parameters
avg_final_rates = deepcopy(all_final_rates[0])
for run in range(1, N_RUNS):
for method in avg_final_rates:
avg_final_rates[method] += all_final_rates[run][method]
denom = np.full_like(avg_final_rates['revo'], N_RUNS)
for method in avg_final_rates:
avg_final_rates[method] /= denom
final_rates = avg_final_rates
plot = PlotRW(args.output_dir, committor_type='peaked', fig_index=N_RUNS)
max_rate = plot.exp_flux_converged
min_rate = plot.exp_flux_converged
# Set all values of 0 to 'None' for plotting
for method in final_rates.keys():
for i in range(len(final_rates[method])):
if final_rates[method][i] == 0:
final_rates[method][i] = np.nan
max_rate = max(max_rate, np.nanmax(final_rates[method]))
min_rate = min(min_rate, np.nanmin(final_rates[method]))
plt.axhline(plot.exp_flux_converged, linestyle='--', label='Expected value at steady state')
plt.legend()
plt.xscale('log')
y_min = 10**np.floor(np.log10(min_rate))
y_max = 10**np.ceil(np.log10(max_rate))
plt.ylim([y_min, y_max])
plt.yscale('log')
plt.xlabel('Total Steps (REVO + Async. Method)')
plt.ylabel('Avg. Unbinding Rate by Run (50 Runs)')
FINAL_RATES_PATH = join(args.output_dir, 'final_rates.png')
plt.savefig(FINAL_RATES_PATH)
#N_SAMPLES = len(REVO_MAX_CYCLES) * len(ASYNC_MAX_CYCLES)
N_SAMPLES = 0
for revo_max_cycles in REVO_MAX_CYCLES:
for async_max_cycles in ASYNC_MAX_CYCLES:
if revo_max_cycles > async_max_cycles:
N_SAMPLES += 1
N_REVO_WALKERS = 48
# TODO Why is args.n_async_reps a list?
N_ASYNC_WALKERS = args.n_async_clusters * args.n_async_frames_per_cluster * args.n_async_reps[0]
for method in final_rates.keys():
# REVO simulations are averaged for a given parameter set and plotted once
if method == 'revo':
label = method
x_axis = [cycles * 10 * N_REVO_WALKERS for cycles in REVO_MAX_CYCLES]
revo_avg = np.zeros(len(REVO_MAX_CYCLES))
counts = np.zeros(len(REVO_MAX_CYCLES))
n_entry = 0
for i, revo_max_cycles in enumerate(REVO_MAX_CYCLES):
for j, async_max_cycles in enumerate(ASYNC_MAX_CYCLES):
if async_max_cycles <= revo_max_cycles:
revo_avg[i] += final_rates[method][n_entry]
counts[i] += 1
n_entry += 1
revo_avg /= counts
if method in plt.gca().get_legend_handles_labels()[1]:
label = ''
else:
label = method
#plt.plot(x_axis, revo_avg, color=plot_colors[method], label=label)
plt.plot(x_axis, revo_avg, color=plot_colors[method], label=label)
plt.savefig(FINAL_RATES_PATH, bbox_extra_artists=(legend,), bbox_inches='tight')
else:
n_entry = 0
for i, revo_max_cycles in enumerate(REVO_MAX_CYCLES):
vals = []
x_axis = []
for j, async_max_cycles in enumerate(ASYNC_MAX_CYCLES):
if async_max_cycles <= revo_max_cycles:
#label = f'{method} ({revo_max_cycles*10} revo steps)'
if method in plt.gca().get_legend_handles_labels()[1]:
label = ''
else:
label = method
x_axis.append((revo_max_cycles + async_max_cycles) * 10 * N_ASYNC_WALKERS)
vals.append(final_rates[method][n_entry])
n_entry += 1
if len(vals) == 1:
plt.plot(x_axis, vals, marker='o', color=plot_colors[method], label=label)
else:
plt.plot(x_axis, vals, color=plot_colors[method], label=label)
legend = plt.legend(bbox_to_anchor=(1.1, 1.1))
plt.savefig(FINAL_RATES_PATH, bbox_extra_artists=(legend,), bbox_inches='tight')
#plt.legend()
#plt.savefig(FINAL_RATES_PATH)
if False:
plt.axhline(plot.exp_flux_converged, linestyle='--', label='Expected value at steady state')
plt.legend()
#plt.xscale('log')
y_min = 10**np.floor(np.log10(min_rate))
y_max = 10**np.ceil(np.log10(max_rate))
plt.ylim([y_min, y_max])
plt.yscale('log')
plt.xlabel('Total Steps (REVO + Async. Method)')
plt.ylabel('Avg. Unbinding Rate by Run (50 Runs)')
FINAL_RATES_PATH = join(args.output_dir, 'final_rates.png')
#plt.savefig(FINAL_RATES_PATH)
plt.savefig(FINAL_RATES_PATH, bbox_extra_artists=(legend,), bbox_inches='tight')
plt.clf()
print('Run finished')