import os
from os.path import join

from matplotlib import pyplot as plt
import mdtraj as mdj
import numpy as np
import pickle
import torch
import torch.nn as nn
from deeptime.util.data import TrajectoriesDataset
from deeptime.decomposition.deep import VAMPNet, koopman_matrix
from deeptime.clustering import MiniBatchKMeans
from deeptime.markov.msm import BayesianMSM
from deeptime.markov.tools.analysis import committor
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm  # progress bar
from IPython.display import Image
import nglview

from wepy.hdf5 import WepyHDF5
from wepy.boundary_conditions.unbinding import UnbindingBC
from wepy.analysis.contig_tree import ContigTree
from wepy.resampling.decisions.clone_merge import MultiCloneMergeDecision
from wepy.util.util import traj_box_vectors_to_lengths_angles
from geomm.rmsd import calc_rmsd
from geomm.grouping import group_pair
from geomm.superimpose import superimpose
from geomm.centering import center_around


def open_wepy(wepy_h5):
    try:
        wepy_h5.open()
        return True
    except IOError:
        return False


def get_observables(wepy_h5):
    opened = open_wepy(wepy_h5)

    try:
        obs_names = wepy_h5.observable_field_names
    except (KeyError, TypeError):
        obs_names = []

    if opened:
        wepy_h5.close()

    return obs_names


def compute_aligned_images(fields, *args):
    sys_idx = args[0]
    aligned_pos = np.asarray(fields['positions'])
    bvs = np.diagonal(fields['box_vectors'], axis1=1, axis2=2)

    for i in range(aligned_pos.shape[0]):
        # Group CLR and RAMP
        aligned_pos[i] = group_pair(aligned_pos[i], bvs[i], A_IDXS[sys_idx], B_IDXS[sys_idx])

        # Then group peptide to CLR/RAMP complex
        aligned_pos[i] = group_pair(aligned_pos[i], bvs[i], LIGAND_IDXS[sys_idx], AB_IDXS[sys_idx])
        aligned_pos[i] = center_around(aligned_pos[i], AB_LIGAND_IDXS[sys_idx])

        # Superimpose the CLR/RAMP/peptide complex onto the initial pose using only the CLR and RAMP atomic positions for alignment
        aligned_pos[i], _, _ = superimpose(PDBS[sys_idx].xyz[0], aligned_pos[i], idxs=AB_IDXS[sys_idx])

    return aligned_pos


def load_traj_field(wepy_h5, field_name):
    opened = open_wepy(wepy_h5)

    #N_RUNS = wepy_h5.num_runs
    N_TRAJS = wepy_h5.num_run_trajs(0)
    N_CYCLES = wepy_h5.num_run_cycles(0)

    #all_values = []
    #for n_run in range(N_RUNS):
        #traj_values = [np.asarray(wepy_h5.h5[f'runs/{n_run}/trajectories/{n_traj}/{field_name}']) for n_traj in range(N_TRAJS)]
        #all_values.append(traj_values)

    all_values = [np.asarray(wepy_h5.h5[f'runs/{0}/trajectories/{n_traj}/{field_name}']) for n_traj in range(N_TRAJS)]

    if opened:
        wepy_h5.close()

    #return np.asarray(all_values)
    return all_values


def align_pdb(sys, mdj_traj):
    pos = mdj_traj.xyz[0]
    bvs = mdj_traj.unitcell_lengths[0]

    # Group CLR and RAMP
    pos = group_pair(pos, bvs, A_IDXS[sys], B_IDXS[sys])

    # Then group peptide to CLR/RAMP complex
    pos = group_pair(pos, bvs, LIGAND_IDXS[sys], AB_IDXS[sys])

    pos = center_around(pos, AB_LIGAND_IDXS)

    return pos


def compute_ligand_backbone_dihedrals(fields, *args):
    topology = args[0]
    ligand_quartlets = args[1]

    unitcell_lengths, unitcell_angles = traj_box_vectors_to_lengths_angles(fields['box_vectors'])

    traj = mdj.Trajectory(fields['observables/aligned_images'],
                          topology,
                          unitcell_lengths=unitcell_lengths,
                          unitcell_angles=unitcell_angles)

    dihedrals = mdj.compute_dihedrals(traj, indices=ligand_quartlets)

    # Expand dihedral angles to sin, cos pairs
    normalized_dihedrals = np.empty((dihedrals.shape[0], 2*dihedrals.shape[1]))
    normalized_dihedrals[:,0::2] = np.sin(dihedrals)
    normalized_dihedrals[:,1::2] = np.cos(dihedrals)

    return normalized_dihedrals


def compute_residue_rmsds(fields, *args):
    """Compute the RMSD for the backbone atoms of each residue between the inital conformation and each frame
    """
    residues_indices = args[0]
    init_residue_pos = args[1]
    aligned_images = np.asarray(fields['observables/aligned_images'])

    rmsds = np.empty((aligned_images.shape[0], len(residues_indices)))

    for i in range(aligned_images.shape[0]):
        for j in range(len(residues_indices)):
            rmsds[i,j] = calc_rmsd(init_residue_pos[j], aligned_images[i, residues_indices[j]])

    return rmsds


def normalize_features(features):
    features = np.asarray(features)

    # Normalize each feature for all trajectories and cycles
    for i in range(features.shape[-1]):
        features[:,:,i] -= np.average(features[:,:,i])
        features[:,:,i] /= np.std(features[:,:,i])

    return features


def get_singular_values(vampnet, loader):
    # Calc. the singular values for each batch, then avg.
    singular_values = []

    for batch_0, batch_t in loader:
        x_0 = vampnet.lobe(batch_0.to(device=vampnet.device))
        x_t = vampnet.lobe_timelagged(batch_t.to(device=vampnet.device))

        koopman_batch = koopman_matrix(x_0, x_t, epsilon=vampnet.epsilon, mode=vampnet.score_mode)
        singular_values.append(np.diagonal(koopman_batch.data.numpy()))

    avg_sv = np.average(singular_values, axis=0)
    sorted_avg_sv = np.sort(avg_sv)[::-1]

    return sorted_avg_sv


def get_selection(idxs):
    selection = '@'
    for idx in idxs:
        selection += f'{idx},'

    return selection[:-1]

            

def load_inputs(hdf5_path):
    try:
        we = WepyHDF5(hdf5_path,'r+')
    except (IOError, AttributeError, OSError) as e:
        # Sometimes if the Python script exits prematurely, it does not reset the HDF5
        # file's flag. This manually resets it, allowing the file to be opened.
        os.system(f'h5clear -s {hdf5_path}')
        we = WepyHDF5(hdf5_path,'r+')

    with we:
        obs = get_observables(we)

        contig_tree = ContigTree(wepy_h5=we,
                                 decision_class=MultiCloneMergeDecision,
                                 boundary_condition_class=UnbindingBC)

        # Use the sliding window method to load all transitions between frames
        # Note: The parameter '2' indicates that the lag time used is 1
        sw = contig_tree.sliding_windows(2)

        if 'aligned_images' not in obs:
            print('Computing aligned_images and saving to HDF5 file')
            we.compute_observable(compute_aligned_images,
                                    ['positions','box_vectors'],
                                    (sys,),
                                    save_to_hdf5='aligned_images')

        # Load the normalized dihedral features
        DIHEDRALS_PATH = join(hdf5_dir, 'dihedral_trajs.pkl')
        # First try to load the pickled precomputed values
        try:
            with open(DIHEDRALS_PATH, 'rb') as f:
                dihedral_trajs = pickle.load(f)
        except FileNotFoundError:
            # If the pickle file isn't found, try to load them from the HDF5 file
            if 'ligand_backbone_dihedrals' in obs:
                print('Loading ligand_backbone_dihedrals from HDF5 file')
                dihedrals = load_traj_field(we, 'observables/ligand_backbone_dihedrals')
            else:
                # If the features haven't been computed, calculate them and save to the HDF5 file
                print('Computing ligand_backbone_dihedrals and saving to HDF5 file')
                dihedrals = we.compute_observable(compute_ligand_backbone_dihedrals,
                                                    ['observables/aligned_images','box_vectors'],
                                                    (PDBS[sys].topology, ligand_quartlets),
                                                    save_to_hdf5='ligand_backbone_dihedrals')
        
            dihedrals = normalize_features(dihedrals)

            # Transform the dihedrals from features of a frame to features of a trajectory
            dihedral_trajs = [np.array((dihedrals[s[1:]], dihedrals[f[1:]]), dtype=np.float32) for s, f in sw]

            with open(DIHEDRALS_PATH, 'wb') as f:
                pickle.dump(dihedral_trajs, f)

        # Load the normalized residue RMSD features
        # These are for residues near the binding site that appear to interact with the peptide,
        # the RMSD is calculated between each frame and the initial, bound conformation
        RMSDS_PATH = join(hdf5_dir, 'rmsds_trajs.pkl')

        try:
            with open(RMSDS_PATH, 'rb') as f:
                rmsds_trajs = pickle.load(f)
        except FileNotFoundError:
            if 'residue_rmsds' in obs:
                rmsds = load_traj_field(we, 'observables/residue_rmsds')
            else:
                print('Computing residue_rmsds and saving to HDF5 file')
                rmsds = we.compute_observable(compute_residue_rmsds,
                                            ['observables/aligned_images'],
                                            (residues_indices, init_residue_pos),
                                            save_to_hdf5='residue_rmsds')

            rmsds = normalize_features(rmsds)
            rmsds_trajs = [np.array((rmsds[s[1:]], rmsds[f[1:]]), dtype=np.float32) for s, f in sw]

            with open(RMSDS_PATH, 'wb') as f:
                pickle.dump(rmsds_trajs, f)

        MIN_DIST_PATH = join(hdf5_dir, 'min_dists.pkl')
        MIN_DIST_TRAJ_PATH = join(hdf5_dir, 'min_dist_trajs.pkl')
        try:
            with open(MIN_DIST_PATH, 'rb') as f:
                min_dists = pickle.load(f)

            with open(MIN_DIST_TRAJ_PATH, 'rb') as f:
                min_dists_trajs = pickle.load(f)
        except FileNotFoundError:
            assert 'min-dist' in obs, "Missing 'min-dist' observable in wepy HDF5 file"
            min_dists = load_traj_field(we, 'observables/min-dist')
            min_dists = np.expand_dims(min_dists, axis=-1)
            min_dists = normalize_features(min_dists)

            with open(MIN_DIST_PATH, 'wb') as f:
                pickle.dump(min_dists, f)

            min_dists_trajs = [np.array((min_dists[s[1:]], min_dists[f[1:]]), dtype=np.float32) for s, f in sw]

            with open(MIN_DIST_TRAJ_PATH, 'wb') as f:
                pickle.dump(min_dists_trajs, f)

        NATIVE_DISTS_PATH = join(hdf5_dir, 'native_dists_trajs.pkl')
        try:
            with open(NATIVE_DISTS_PATH, 'rb') as f:
                native_dists_trajs = pickle.load(f)
        except FileNotFoundError:
            assert 'native-dists' in obs, "Missing 'native-dists' observable in wepy HDF5 file"
            native_dists = load_traj_field(we, 'observables/native-dists')

            native_dists = normalize_features(native_dists)
            native_dists_trajs = [np.array((native_dists[s[1:]], native_dists[f[1:]]), dtype=np.float32) for s, f in sw]

            with open(NATIVE_DISTS_PATH, 'wb') as f:
                pickle.dump(native_dists_trajs, f)

        print('Loading weights')
        WEIGHTS_PATH = join(hdf5_dir, 'weights.pkl')
        try:
            print('Loading weights from pickle file')
            with open(WEIGHTS_PATH, 'rb') as f:
                weights = pickle.load(f)
        except FileNotFoundError:
            print('Loading weights from HDF5 file')
            weights = load_traj_field(we, 'weights')

            with open(WEIGHTS_PATH, 'wb') as f:
                pickle.dump(weights, f)

        print('Loaded weights')

    return sw, dihedral_trajs, rmsds_trajs, min_dists, min_dists_trajs, native_dists_trajs, weights


base_path = '/dickson/s2/jim/repos/clr/alexrd/home'
base_path2 = '/dickson/s2/jim/repos/bmb-961-machine-learning-for-molecular-dynamics/Project/data'
sys_folders = ['sys1','sys2_AMwt_CLR_R2','sys3_AM2','sys4_CGRP_4mut_CLR_R1','sys5_AMmut']
cg_folders = ['charmm-gui-8162111267','charmm-gui-8163055957','charmm-gui-8169421429','charmm-gui-8162396385','charmm-gui-8169578268']

HDF5_BASE_PATHS = [join(base_path2,sys_folders[i],'REVO') for i in range(5)]

PDB_PATHS = [join(base_path,sys_folders[i],cg_folders[i],'openmm/step3_charmm2omm.pdb') for i in range(5)]
print(PDB_PATHS)
PDBS = [mdj.load_pdb(p) for p in PDB_PATHS]

A_IDXS = [p.top.select('segname PROA') for p in PDBS]
B_IDXS = [p.top.select('segname PROB') for p in PDBS]
LIGAND_IDXS = [p.top.select('segname PROC') for p in PDBS]
AB_IDXS = [p.top.select('segname PROA PROB') for p in PDBS]
AB_LIGAND_IDXS = [p.top.select('segname PROA PROB PROC') for p in PDBS]

res_ids = (2072, 2092, 2094, 2119, 2121)

RAMP_SELECTIONS = [get_selection(idxs) for idxs in A_IDXS ]
CLR_SELECTIONS = [get_selection(idxs) for idxs in B_IDXS ]
LIGAND_SELECTIONS = [get_selection(idxs) for idxs in LIGAND_IDXS ]

['/dickson/s2/jim/repos/clr/alexrd/home/sys1/charmm-gui-8162111267/openmm/step3_charmm2omm.pdb', '/dickson/s2/jim/repos/clr/alexrd/home/sys2_AMwt_CLR_R2/charmm-gui-8163055957/openmm/step3_charmm2omm.pdb', '/dickson/s2/jim/repos/clr/alexrd/home/sys3_AM2/charmm-gui-8169421429/openmm/step3_charmm2omm.pdb', '/dickson/s2/jim/repos/clr/alexrd/home/sys4_CGRP_4mut_CLR_R1/charmm-gui-8162396385/openmm/step3_charmm2omm.pdb', '/dickson/s2/jim/repos/clr/alexrd/home/sys5_AMmut/charmm-gui-8169578268/openmm/step3_charmm2omm.pdb']


sys = 0

initial_pose = nglview.show_mdtraj(PDBS[sys], default_representation=False)
initial_pose.add_cartoon(RAMP_SELECTIONS[sys], color='purple')
initial_pose.add_cartoon(CLR_SELECTIONS[sys], color='orange')
initial_pose.add_representation('ball+stick', selection=LIGAND_SELECTIONS[sys], color='red')
initial_pose.center()

print('CGRP')
initial_pose

CGRP

NGLWidget()


sys = 3

initial_pose = nglview.show_mdtraj(PDBS[sys], default_representation=False)
initial_pose.add_cartoon(RAMP_SELECTIONS[sys], color='purple')
initial_pose.add_cartoon(CLR_SELECTIONS[sys], color='orange')
initial_pose.add_representation('ball+stick', selection=LIGAND_SELECTIONS[sys], color='red')
initial_pose.center()

print('ss-CGRP')
initial_pose

ss-CGRP

NGLWidget()


if torch.cuda.is_available():
    device = torch.device("cuda")
    torch.backends.cudnn.benchmark = True
else:
    device = torch.device("cpu")
    torch.set_num_threads(12)
    
print(f"Using device {device}")

Using device cpu


# Systems are 0:CGRP and 3:ss-CGRP
#sys = 0
sys = 3

#rep = 0
rep = 2

#output_states = 7
output_states = None


ligand_backbone_idxs = PDBS[sys].top.select('segname PROC and backbone')
ligand_quartlets = [ligand_backbone_idxs[i:i+4] for i in range(len(ligand_backbone_idxs) - 4 + 1)]

# Note: MDTraj's 'residue' is equivalent to VMD's 'resid'
residues_indices = [PDBS[sys].top.select(f'residue {res_id} and backbone') for res_id in res_ids]
init_residue_pos = [PDBS[sys].xyz[0][residue_indices] for residue_indices in residues_indices]

hdf5_dir = join(HDF5_BASE_PATHS[sys],f'tail{rep}_0')
hdf5_path = join(HDF5_BASE_PATHS[sys],f'tail{rep}_0',f'wepy.resultstail{rep}_0.h5')

print("Loading",hdf5_path,"...")

sw, dihedral_trajs, rmsds_trajs, min_dists, min_dists_trajs, native_dists_trajs, weights = load_inputs(hdf5_path)

N_FEATURES = len(dihedral_trajs[0][0]) + len(rmsds_trajs[0][0]) + len(min_dists_trajs[0][0]) + len(native_dists_trajs[0][0])
features_arr = np.empty((len(dihedral_trajs), 2, N_FEATURES), dtype=np.float32)
for i in range(features_arr.shape[0]):
    for j in range(2):
        features_arr[i,j] = np.concatenate((dihedral_trajs[i][j], rmsds_trajs[i][j], min_dists_trajs[i][j], native_dists_trajs[i][j]), dtype=np.float32)

# DeepTime's TrajectoriesDataset expects a list of NumPy arrays as the collection of trajectories
features = [val for val in features_arr]

Loading /dickson/s2/jim/repos/bmb-961-machine-learning-for-molecular-dynamics/Project/data/sys4_CGRP_4mut_CLR_R1/REVO/tail2_0/wepy.resultstail2_0.h5 ...
Loading weights
Loading weights from pickle file
Loaded weights


dataset = TrajectoriesDataset.from_numpy(lagtime=1, data=features)

n_val = int(len(dataset)*.3)
train_data, val_data = torch.utils.data.random_split(dataset, [len(dataset)-n_val, n_val])
pass


if output_states is None:
    n_runs = 3 #5
    all_output_states = np.arange(2, 15+1)
    #all_output_states = np.arange(2, 4+1)
    vamp_scores = []
    all_sv_len = all_output_states[-1] - all_output_states[0] + 1
    all_sv = np.full((np.max(all_output_states), all_sv_len, n_runs), np.nan)
    all_sv[0] = 1

    for state_i, output_states in enumerate(all_output_states):
        run_scores = []

        for run in range(n_runs):
            lobe = nn.Sequential(
                nn.Linear(N_FEATURES, 20), nn.ELU(),
                nn.Linear(20, output_states),
                nn.Softmax(dim=1)  # obtain fuzzy probability distribution over output states
            )

            lobe = lobe.to(device=device)
            #print(lobe)

            loader_train = DataLoader(train_data, batch_size=10000, shuffle=True)
            loader_val = DataLoader(val_data, batch_size=len(val_data), shuffle=False)

            vampnet = VAMPNet(lobe=lobe, learning_rate=5e-3, device=device)
            model = vampnet.fit(loader_train, n_epochs=5, validation_loader=loader_val).fetch_model()
            
            # Analysis step 1: Find number of output states to use
            sv = get_singular_values(vampnet, loader_val)[:-1]

            for j, val in enumerate(sv, start=1):
                all_sv[j, state_i, run] = val

            run_scores.append(vampnet.validation_scores[-1][1])

        vamp_scores.append(run_scores)


if True or output_states is None:
    mean_scores = np.average(vamp_scores, axis=1)
    std_scores = np.std(vamp_scores, axis=1)

    plt.plot(all_output_states, all_output_states, 'r--', label='Theoretical Max. Score')
    plt.plot(all_output_states, mean_scores, label='Average Score')
    #plt.fill_between(all_output_states, mean_scores - std_scores, mean_scores + std_scores, alpha=0.2)
    plt.xticks(all_output_states)
    plt.xlabel('Number of Output States')
    plt.ylabel('VAMP Score')
    plt.legend()
    plt.savefig(join(hdf5_dir, 'vampnet_scores_new.png'))
    #plt.clf()


Image(filename=join(hdf5_dir, 'vampnet_scores.png'))


if output_states is None:
    mean_sv = np.average(all_sv, axis=2)

    for sv_vals in mean_sv:
        plt.plot(all_output_states, sv_vals)

    plt.xticks(all_output_states)
    plt.xlabel('Number of Output States')
    plt.ylabel('Singular Value Magnitude')
    plt.savefig(join(hdf5_dir, 'vampnet_sv.png'))
    plt.clf()


Image(filename=join(hdf5_dir, 'vampnet_sv.png'))


from torch.utils.data import SubsetRandomSampler, Subset

def get_model(dataset, train_ids, test_ids):
    lobe = nn.Sequential(nn.Linear(N_FEATURES, 20),
                         nn.ELU(),
                         nn.Linear(20, output_states),
                         nn.Softmax(dim=1))  # obtain fuzzy probability distribution over output states

    lobe = lobe.to(device=device)
    #print(lobe)

    train_set = Subset(dataset, train_ids)
    val_set = Subset(dataset, test_ids)

    loader_train = DataLoader(train_set, batch_size=10000)
    loader_val = DataLoader(val_set, batch_size=len(val_set))

    vampnet = VAMPNet(lobe=lobe, learning_rate=5e-3, device=device)
    model = vampnet.fit(loader_train, n_epochs=5, validation_loader=loader_val).fetch_model()

    return vampnet, model


from sklearn.model_selection import KFold

splits = 5
k_fold = KFold(n_splits=splits, shuffle=True)

val_scores = np.full(splits, np.nan)

for i, (train_ids, test_ids) in enumerate(k_fold.split(dataset)):
    vampnet_trial, model_trial = get_model(dataset, train_ids, test_ids)
    val_score = vampnet_trial.validation_scores[-1,-1]

    # Keep worst-performing model
    if i == 0 or val_score < np.nanmin(val_scores):
        vampnet = vampnet_trial
        model = model_trial

    val_scores[i] = val_score


plt.bar(np.arange(1, splits+1), val_scores)
plt.title('5-Fold Cross-Validation')
plt.xlabel('Test Data Subset')
plt.ylabel('VAMP-2 Score\n(Max. Score = 7)')

print(np.mean(val_scores))
print(np.std(val_scores))

6.77506742477417
0.019307468708692196


projections = [model.transform(traj) for traj in features]


min_dist_by_state = [[] for i in range(output_states)]
for i in range(len(projections)):
    state = np.argmax(projections[i][1])
    min_dist_by_state[state].append(min_dists_trajs[i][1][0])

avg_min_dist_by_state = [np.average(state_proj) for state_proj in min_dist_by_state]


N_CLUSTERS = 10

FRAME_CLUSTERS_PATH = join(hdf5_dir, 'frame_clusters.pkl')
CLUSTER_CENTERS_PATH = join(hdf5_dir, 'cluster_centers.pkl')

try:
    raise FileNotFoundError

    with open(FRAME_CLUSTERS_PATH, 'rb') as f:
        frame_clusters = pickle.load(f)

    with open(CLUSTER_CENTERS_PATH, 'rb') as f:
        cluster_centers = pickle.load(f)
except FileNotFoundError:
    # Cluster using the projections of each frame onto the output states
    # Note: this is using the fuzzy memberships
    cluster = MiniBatchKMeans(N_CLUSTERS).fit_fetch(projections)

    # clusters are stored as starting frame -> finishing frame
    # TODO Increase lag time to look at longer transitions?
    frame_clusters = [cluster.transform(x) for x in projections]
    cluster_centers = cluster.cluster_centers

with open(FRAME_CLUSTERS_PATH, 'wb') as f:
    pickle.dump(frame_clusters, f)

with open(CLUSTER_CENTERS_PATH, 'wb') as f:
    pickle.dump(cluster_centers, f)


# The bound state is considered to be the state where the avg. RMSD between the peptide and the initial (bound) peptide is the smallest
# Conversely, the unbound state is considered to be the state where the avg. RMSD between the peptide and the initial (bound) peptide is the largest
cluster_dists = np.zeros(N_CLUSTERS)
cluster_weights = np.zeros(N_CLUSTERS)

for i in range(len(sw)):
    _, walker, cycle = sw[i][1]
    cluster_dists[frame_clusters[i][1]] += (min_dists[walker][cycle] * weights[walker][cycle][0])
    cluster_weights[frame_clusters[i][1]] += weights[walker][cycle][0]

# Calc. avg. min. distance from peptide to RAMP/CLR
cluster_dists /= cluster_weights

bound_cluster = None
for i, dist in enumerate(cluster_dists):
    if dist > 0 and (bound_cluster is None or dist < cluster_dists[bound_cluster]):
        bound_cluster = i

#bound_cluster = np.argmin(cluster_dists)
unbound_cluster = np.argmax(cluster_dists)


if False:
    print('Bound cluster projection values:')
    for state, proj in enumerate(cluster_centers[bound_cluster]):
        print(f'\tState {state}: {proj}')

    print('\nUnbound cluster projection values:')
    for state, proj in enumerate(cluster_centers[unbound_cluster]):
        print(f'\tState {state}: {proj}')

    print('\nDifference')
    for state, proj in enumerate(cluster_centers[bound_cluster] - cluster_centers[unbound_cluster]):
        print(f'\tState {state}: {proj}')

states = np.arange(cluster_centers.shape[1])
plt.bar(states - 0.2, cluster_centers[bound_cluster], 0.4, label='Bound cluster')
plt.bar(states + 0.2, cluster_centers[unbound_cluster], 0.4, label='Unbound cluster')
plt.legend()
plt.xlabel('Output State')
plt.ylabel('Avg. Projection')
plt.show()
plt.clf()

<Figure size 640x480 with 0 Axes>


BOUND_TRAJ_PATH = join(hdf5_dir, 'bound_traj.pdb')
UNBOUND_TRAJ_PATH = join(hdf5_dir, 'unbound_traj.pdb')

try:
    bound_trajectory = mdj.load_pdb(BOUND_TRAJ_PATH)
    unbound_trajectory = mdj.load_pdb(UNBOUND_TRAJ_PATH)
except FileNotFoundError:
    bound_frames = []
    unbound_frames = []

    for i in range(len(sw)):
        f_cluster = frame_clusters[i][1]

        if f_cluster == bound_cluster:
            bound_frames.append(sw[i][1])
        elif f_cluster == unbound_cluster:
            unbound_frames.append(sw[i][1])

    try:
        we = WepyHDF5(hdf5_path,'r')
    except (IOError, AttributeError, OSError) as e:
            # Sometimes if the Python script exits prematurely, it does not reset the HDF5
            # file's flag. This manually resets it, allowing the file to be opened.
            os.system(f'h5clear -s {hdf5_path}')
            we = WepyHDF5(hdf5_path,'r')

    with we:
        if len(bound_frames) > 50:
            bound_frames = bound_frames[:50]

        if len(unbound_frames) > 50:
            unbound_frames = unbound_frames[:50]

        bound_trajectory = we.trace_to_mdtraj(bound_frames)
        unbound_trajectory = we.trace_to_mdtraj(unbound_frames)

        bound_fields = we.get_trace_fields(bound_frames, ['observables/aligned_images'])
        unbound_fields = we.get_trace_fields(unbound_frames, ['observables/aligned_images'])

        bound_trajectory.xyz = bound_fields['observables/aligned_images']
        unbound_trajectory.xyz = unbound_fields['observables/aligned_images']

    bound_trajectory.save_pdb(BOUND_TRAJ_PATH, force_overwrite=True)
    unbound_trajectory.save_pdb(UNBOUND_TRAJ_PATH, force_overwrite=True)


initial_pose = nglview.show_mdtraj(PDBS[sys], default_representation=False)
initial_pose.add_cartoon(RAMP_SELECTIONS[sys], color='purple') # RAMP
initial_pose.add_cartoon(CLR_SELECTIONS[sys], color='orange') # CLR
initial_pose.add_representation('ball+stick', selection=LIGAND_SELECTIONS[sys], color='red') # Peptide
initial_pose.center()

initial_pose

NGLWidget()


from nglview.contrib.movie import MovieMaker

bound_view = nglview.show_mdtraj(bound_trajectory, default_representation=False)
bound_view.add_cartoon(RAMP_SELECTIONS[sys], color='purple') # RAMP
bound_view.add_cartoon(CLR_SELECTIONS[sys], color='orange') # CLR
bound_view.add_representation('ball+stick', selection=LIGAND_SELECTIONS[sys], color='red') # Peptide
bound_view.center()

bound_movie = MovieMaker(bound_view, output='bound_images.gif', in_memory=True)
print(os.getcwd())
bound_movie.make()

bound_view

/dickson/s2/jim/repos/bmb-961-machine-learning-for-molecular-dynamics/Project/src

NGLWidget(max_frame=49)


unbound_view = nglview.show_mdtraj(unbound_trajectory, default_representation=False)
unbound_view.add_cartoon(RAMP_SELECTIONS[sys], color='purple') # RAMP
unbound_view.add_cartoon(CLR_SELECTIONS[sys], color='orange') # CLR
unbound_view.add_representation('ball+stick', selection=LIGAND_SELECTIONS[sys], color='red') # Peptide
unbound_view.center()

unbound_view

NGLWidget(max_frame=49)


if True:
    # Construct the markov state model
    bmsm = BayesianMSM(lagtime=1, maxerr=0.1).fit_fetch(frame_clusters)

    # For this analysis, extremely unlikely transitions (defined as transitions that occured less than 3 times) are ignored
    counts_mat = bmsm.prior.count_model.count_matrix_full
    trim_trans_mat = bmsm.prior.transition_matrix
    trim_trans_mat[counts_mat < 3] = 0

    #Finally, calculate the committor for each state
    committors = committor(trim_trans_mat, A=bound_cluster, B=unbound_cluster)

    # Find avg. change in committor for each cluster (using the final state)
    cluster_avg_features = np.zeros((N_CLUSTERS, N_FEATURES))
    counts = np.zeros_like(committors)

    # TODO weight by frame weight
    # Find avg. d_committor and avg. features for each cluster
    for i in range(len(frame_clusters)):
        cluster = frame_clusters[i][1]
        cluster_avg_features[cluster] += features[i][1]
        counts[cluster] += 1

    for i in range(len(counts)):
        cluster_avg_features[i] /= counts[i]

    pass


from deeptime.plots import plot_markov_model
from matplotlib.colors import Normalize
from matplotlib import cm

# plot_markov_model() requires the row sums to be within a certain error tolerance.
# This will scale the elements by row so that each row's elements sum to one. This is for vizualization purposes only.
viz_trim_trans_mat = np.copy(trim_trans_mat)

for i in range(viz_trim_trans_mat.shape[0]):
    viz_trim_trans_mat[i] /= np.sum(viz_trim_trans_mat[i])

norm = Normalize(vmin=np.min(cluster_dists), vmax=np.max(cluster_dists), clip=True)
mapper = cm.ScalarMappable(norm=norm, cmap=cm.get_cmap('coolwarm'))

plot_markov_model(viz_trim_trans_mat, state_colors=mapper.to_rgba(cluster_dists))

[[0.35336916 0.47206866 0.8925704  1.        ]
 [0.2526626  0.33283679 0.78366503 1.        ]
 [0.2298057  0.29871797 0.75368315 1.        ]
 [0.33837651 0.45281861 0.87931708 1.        ]
 [0.36850661 0.49114119 0.90524313 1.        ]
 [0.35841498 0.47842617 0.89679465 1.        ]
 [0.30906032 0.41349827 0.85012763 1.        ]
 [0.4148009  0.54687353 0.93908753 1.        ]
 [0.70567316 0.01555616 0.15023281 1.        ]
 [0.33349048 0.44626522 0.87445217 1.        ]]

(<AxesSubplot:>,
 {0: array([-0.4221958 ,  0.24621369]),
  1: array([ 1.        , -0.45517761]),
  2: array([-0.38633693, -0.3983539 ]),
  3: array([0.24604885, 0.02518475]),
  4: array([-0.16262182,  0.30198685]),
  5: array([-0.50871346, -0.1639395 ]),
  6: array([ 0.76504383, -0.22023708]),
  7: array([-0.24291366, -0.15079961]),
  8: array([-0.12232792,  0.79307721]),
  9: array([-0.16598308,  0.02204521])})