import os
from os.path import join
from matplotlib import pyplot as plt
import mdtraj as mdj
import numpy as np
import pickle
import torch
import torch.nn as nn
from deeptime.util.data import TrajectoriesDataset
from deeptime.decomposition.deep import VAMPNet, koopman_matrix
from deeptime.clustering import MiniBatchKMeans
from deeptime.markov.msm import BayesianMSM
from deeptime.markov.tools.analysis import committor
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm # progress bar
from IPython.display import Image
import nglview
from wepy.hdf5 import WepyHDF5
from wepy.boundary_conditions.unbinding import UnbindingBC
from wepy.analysis.contig_tree import ContigTree
from wepy.resampling.decisions.clone_merge import MultiCloneMergeDecision
from wepy.util.util import traj_box_vectors_to_lengths_angles
from geomm.rmsd import calc_rmsd
from geomm.grouping import group_pair
from geomm.superimpose import superimpose
from geomm.centering import center_around
def open_wepy(wepy_h5):
try:
wepy_h5.open()
return True
except IOError:
return False
def get_observables(wepy_h5):
opened = open_wepy(wepy_h5)
try:
obs_names = wepy_h5.observable_field_names
except (KeyError, TypeError):
obs_names = []
if opened:
wepy_h5.close()
return obs_names
def compute_aligned_images(fields, *args):
sys_idx = args[0]
aligned_pos = np.asarray(fields['positions'])
bvs = np.diagonal(fields['box_vectors'], axis1=1, axis2=2)
for i in range(aligned_pos.shape[0]):
# Group CLR and RAMP
aligned_pos[i] = group_pair(aligned_pos[i], bvs[i], A_IDXS[sys_idx], B_IDXS[sys_idx])
# Then group peptide to CLR/RAMP complex
aligned_pos[i] = group_pair(aligned_pos[i], bvs[i], LIGAND_IDXS[sys_idx], AB_IDXS[sys_idx])
aligned_pos[i] = center_around(aligned_pos[i], AB_LIGAND_IDXS[sys_idx])
# Superimpose the CLR/RAMP/peptide complex onto the initial pose using only the CLR and RAMP atomic positions for alignment
aligned_pos[i], _, _ = superimpose(PDBS[sys_idx].xyz[0], aligned_pos[i], idxs=AB_IDXS[sys_idx])
return aligned_pos
def load_traj_field(wepy_h5, field_name):
opened = open_wepy(wepy_h5)
#N_RUNS = wepy_h5.num_runs
N_TRAJS = wepy_h5.num_run_trajs(0)
N_CYCLES = wepy_h5.num_run_cycles(0)
#all_values = []
#for n_run in range(N_RUNS):
#traj_values = [np.asarray(wepy_h5.h5[f'runs/{n_run}/trajectories/{n_traj}/{field_name}']) for n_traj in range(N_TRAJS)]
#all_values.append(traj_values)
all_values = [np.asarray(wepy_h5.h5[f'runs/{0}/trajectories/{n_traj}/{field_name}']) for n_traj in range(N_TRAJS)]
if opened:
wepy_h5.close()
#return np.asarray(all_values)
return all_values
def align_pdb(sys, mdj_traj):
pos = mdj_traj.xyz[0]
bvs = mdj_traj.unitcell_lengths[0]
# Group CLR and RAMP
pos = group_pair(pos, bvs, A_IDXS[sys], B_IDXS[sys])
# Then group peptide to CLR/RAMP complex
pos = group_pair(pos, bvs, LIGAND_IDXS[sys], AB_IDXS[sys])
pos = center_around(pos, AB_LIGAND_IDXS)
return pos
def compute_ligand_backbone_dihedrals(fields, *args):
topology = args[0]
ligand_quartlets = args[1]
unitcell_lengths, unitcell_angles = traj_box_vectors_to_lengths_angles(fields['box_vectors'])
traj = mdj.Trajectory(fields['observables/aligned_images'],
topology,
unitcell_lengths=unitcell_lengths,
unitcell_angles=unitcell_angles)
dihedrals = mdj.compute_dihedrals(traj, indices=ligand_quartlets)
# Expand dihedral angles to sin, cos pairs
normalized_dihedrals = np.empty((dihedrals.shape[0], 2*dihedrals.shape[1]))
normalized_dihedrals[:,0::2] = np.sin(dihedrals)
normalized_dihedrals[:,1::2] = np.cos(dihedrals)
return normalized_dihedrals
def compute_residue_rmsds(fields, *args):
"""Compute the RMSD for the backbone atoms of each residue between the inital conformation and each frame
"""
residues_indices = args[0]
init_residue_pos = args[1]
aligned_images = np.asarray(fields['observables/aligned_images'])
rmsds = np.empty((aligned_images.shape[0], len(residues_indices)))
for i in range(aligned_images.shape[0]):
for j in range(len(residues_indices)):
rmsds[i,j] = calc_rmsd(init_residue_pos[j], aligned_images[i, residues_indices[j]])
return rmsds
def normalize_features(features):
features = np.asarray(features)
# Normalize each feature for all trajectories and cycles
for i in range(features.shape[-1]):
features[:,:,i] -= np.average(features[:,:,i])
features[:,:,i] /= np.std(features[:,:,i])
return features
def get_singular_values(vampnet, loader):
# Calc. the singular values for each batch, then avg.
singular_values = []
for batch_0, batch_t in loader:
x_0 = vampnet.lobe(batch_0.to(device=vampnet.device))
x_t = vampnet.lobe_timelagged(batch_t.to(device=vampnet.device))
koopman_batch = koopman_matrix(x_0, x_t, epsilon=vampnet.epsilon, mode=vampnet.score_mode)
singular_values.append(np.diagonal(koopman_batch.data.numpy()))
avg_sv = np.average(singular_values, axis=0)
sorted_avg_sv = np.sort(avg_sv)[::-1]
return sorted_avg_sv
def get_selection(idxs):
selection = '@'
for idx in idxs:
selection += f'{idx},'
return selection[:-1]
def load_inputs(hdf5_path):
try:
we = WepyHDF5(hdf5_path,'r+')
except (IOError, AttributeError, OSError) as e:
# Sometimes if the Python script exits prematurely, it does not reset the HDF5
# file's flag. This manually resets it, allowing the file to be opened.
os.system(f'h5clear -s {hdf5_path}')
we = WepyHDF5(hdf5_path,'r+')
with we:
obs = get_observables(we)
contig_tree = ContigTree(wepy_h5=we,
decision_class=MultiCloneMergeDecision,
boundary_condition_class=UnbindingBC)
# Use the sliding window method to load all transitions between frames
# Note: The parameter '2' indicates that the lag time used is 1
sw = contig_tree.sliding_windows(2)
if 'aligned_images' not in obs:
print('Computing aligned_images and saving to HDF5 file')
we.compute_observable(compute_aligned_images,
['positions','box_vectors'],
(sys,),
save_to_hdf5='aligned_images')
# Load the normalized dihedral features
DIHEDRALS_PATH = join(hdf5_dir, 'dihedral_trajs.pkl')
# First try to load the pickled precomputed values
try:
with open(DIHEDRALS_PATH, 'rb') as f:
dihedral_trajs = pickle.load(f)
except FileNotFoundError:
# If the pickle file isn't found, try to load them from the HDF5 file
if 'ligand_backbone_dihedrals' in obs:
print('Loading ligand_backbone_dihedrals from HDF5 file')
dihedrals = load_traj_field(we, 'observables/ligand_backbone_dihedrals')
else:
# If the features haven't been computed, calculate them and save to the HDF5 file
print('Computing ligand_backbone_dihedrals and saving to HDF5 file')
dihedrals = we.compute_observable(compute_ligand_backbone_dihedrals,
['observables/aligned_images','box_vectors'],
(PDBS[sys].topology, ligand_quartlets),
save_to_hdf5='ligand_backbone_dihedrals')
dihedrals = normalize_features(dihedrals)
# Transform the dihedrals from features of a frame to features of a trajectory
dihedral_trajs = [np.array((dihedrals[s[1:]], dihedrals[f[1:]]), dtype=np.float32) for s, f in sw]
with open(DIHEDRALS_PATH, 'wb') as f:
pickle.dump(dihedral_trajs, f)
# Load the normalized residue RMSD features
# These are for residues near the binding site that appear to interact with the peptide,
# the RMSD is calculated between each frame and the initial, bound conformation
RMSDS_PATH = join(hdf5_dir, 'rmsds_trajs.pkl')
try:
with open(RMSDS_PATH, 'rb') as f:
rmsds_trajs = pickle.load(f)
except FileNotFoundError:
if 'residue_rmsds' in obs:
rmsds = load_traj_field(we, 'observables/residue_rmsds')
else:
print('Computing residue_rmsds and saving to HDF5 file')
rmsds = we.compute_observable(compute_residue_rmsds,
['observables/aligned_images'],
(residues_indices, init_residue_pos),
save_to_hdf5='residue_rmsds')
rmsds = normalize_features(rmsds)
rmsds_trajs = [np.array((rmsds[s[1:]], rmsds[f[1:]]), dtype=np.float32) for s, f in sw]
with open(RMSDS_PATH, 'wb') as f:
pickle.dump(rmsds_trajs, f)
MIN_DIST_PATH = join(hdf5_dir, 'min_dists.pkl')
MIN_DIST_TRAJ_PATH = join(hdf5_dir, 'min_dist_trajs.pkl')
try:
with open(MIN_DIST_PATH, 'rb') as f:
min_dists = pickle.load(f)
with open(MIN_DIST_TRAJ_PATH, 'rb') as f:
min_dists_trajs = pickle.load(f)
except FileNotFoundError:
assert 'min-dist' in obs, "Missing 'min-dist' observable in wepy HDF5 file"
min_dists = load_traj_field(we, 'observables/min-dist')
min_dists = np.expand_dims(min_dists, axis=-1)
min_dists = normalize_features(min_dists)
with open(MIN_DIST_PATH, 'wb') as f:
pickle.dump(min_dists, f)
min_dists_trajs = [np.array((min_dists[s[1:]], min_dists[f[1:]]), dtype=np.float32) for s, f in sw]
with open(MIN_DIST_TRAJ_PATH, 'wb') as f:
pickle.dump(min_dists_trajs, f)
NATIVE_DISTS_PATH = join(hdf5_dir, 'native_dists_trajs.pkl')
try:
with open(NATIVE_DISTS_PATH, 'rb') as f:
native_dists_trajs = pickle.load(f)
except FileNotFoundError:
assert 'native-dists' in obs, "Missing 'native-dists' observable in wepy HDF5 file"
native_dists = load_traj_field(we, 'observables/native-dists')
native_dists = normalize_features(native_dists)
native_dists_trajs = [np.array((native_dists[s[1:]], native_dists[f[1:]]), dtype=np.float32) for s, f in sw]
with open(NATIVE_DISTS_PATH, 'wb') as f:
pickle.dump(native_dists_trajs, f)
print('Loading weights')
WEIGHTS_PATH = join(hdf5_dir, 'weights.pkl')
try:
print('Loading weights from pickle file')
with open(WEIGHTS_PATH, 'rb') as f:
weights = pickle.load(f)
except FileNotFoundError:
print('Loading weights from HDF5 file')
weights = load_traj_field(we, 'weights')
with open(WEIGHTS_PATH, 'wb') as f:
pickle.dump(weights, f)
print('Loaded weights')
return sw, dihedral_trajs, rmsds_trajs, min_dists, min_dists_trajs, native_dists_trajs, weights
base_path = '/dickson/s2/jim/repos/clr/alexrd/home'
base_path2 = '/dickson/s2/jim/repos/bmb-961-machine-learning-for-molecular-dynamics/Project/data'
sys_folders = ['sys1','sys2_AMwt_CLR_R2','sys3_AM2','sys4_CGRP_4mut_CLR_R1','sys5_AMmut']
cg_folders = ['charmm-gui-8162111267','charmm-gui-8163055957','charmm-gui-8169421429','charmm-gui-8162396385','charmm-gui-8169578268']
HDF5_BASE_PATHS = [join(base_path2,sys_folders[i],'REVO') for i in range(5)]
PDB_PATHS = [join(base_path,sys_folders[i],cg_folders[i],'openmm/step3_charmm2omm.pdb') for i in range(5)]
print(PDB_PATHS)
PDBS = [mdj.load_pdb(p) for p in PDB_PATHS]
A_IDXS = [p.top.select('segname PROA') for p in PDBS]
B_IDXS = [p.top.select('segname PROB') for p in PDBS]
LIGAND_IDXS = [p.top.select('segname PROC') for p in PDBS]
AB_IDXS = [p.top.select('segname PROA PROB') for p in PDBS]
AB_LIGAND_IDXS = [p.top.select('segname PROA PROB PROC') for p in PDBS]
res_ids = (2072, 2092, 2094, 2119, 2121)
RAMP_SELECTIONS = [get_selection(idxs) for idxs in A_IDXS ]
CLR_SELECTIONS = [get_selection(idxs) for idxs in B_IDXS ]
LIGAND_SELECTIONS = [get_selection(idxs) for idxs in LIGAND_IDXS ]
['/dickson/s2/jim/repos/clr/alexrd/home/sys1/charmm-gui-8162111267/openmm/step3_charmm2omm.pdb', '/dickson/s2/jim/repos/clr/alexrd/home/sys2_AMwt_CLR_R2/charmm-gui-8163055957/openmm/step3_charmm2omm.pdb', '/dickson/s2/jim/repos/clr/alexrd/home/sys3_AM2/charmm-gui-8169421429/openmm/step3_charmm2omm.pdb', '/dickson/s2/jim/repos/clr/alexrd/home/sys4_CGRP_4mut_CLR_R1/charmm-gui-8162396385/openmm/step3_charmm2omm.pdb', '/dickson/s2/jim/repos/clr/alexrd/home/sys5_AMmut/charmm-gui-8169578268/openmm/step3_charmm2omm.pdb']
sys = 0
initial_pose = nglview.show_mdtraj(PDBS[sys], default_representation=False)
initial_pose.add_cartoon(RAMP_SELECTIONS[sys], color='purple')
initial_pose.add_cartoon(CLR_SELECTIONS[sys], color='orange')
initial_pose.add_representation('ball+stick', selection=LIGAND_SELECTIONS[sys], color='red')
initial_pose.center()
print('CGRP')
initial_pose
CGRP
NGLWidget()
sys = 3
initial_pose = nglview.show_mdtraj(PDBS[sys], default_representation=False)
initial_pose.add_cartoon(RAMP_SELECTIONS[sys], color='purple')
initial_pose.add_cartoon(CLR_SELECTIONS[sys], color='orange')
initial_pose.add_representation('ball+stick', selection=LIGAND_SELECTIONS[sys], color='red')
initial_pose.center()
print('ss-CGRP')
initial_pose
ss-CGRP
NGLWidget()
if torch.cuda.is_available():
device = torch.device("cuda")
torch.backends.cudnn.benchmark = True
else:
device = torch.device("cpu")
torch.set_num_threads(12)
print(f"Using device {device}")
Using device cpu
# Systems are 0:CGRP and 3:ss-CGRP
#sys = 0
sys = 3
#rep = 0
rep = 2
#output_states = 7
output_states = None
ligand_backbone_idxs = PDBS[sys].top.select('segname PROC and backbone')
ligand_quartlets = [ligand_backbone_idxs[i:i+4] for i in range(len(ligand_backbone_idxs) - 4 + 1)]
# Note: MDTraj's 'residue' is equivalent to VMD's 'resid'
residues_indices = [PDBS[sys].top.select(f'residue {res_id} and backbone') for res_id in res_ids]
init_residue_pos = [PDBS[sys].xyz[0][residue_indices] for residue_indices in residues_indices]
hdf5_dir = join(HDF5_BASE_PATHS[sys],f'tail{rep}_0')
hdf5_path = join(HDF5_BASE_PATHS[sys],f'tail{rep}_0',f'wepy.resultstail{rep}_0.h5')
print("Loading",hdf5_path,"...")
sw, dihedral_trajs, rmsds_trajs, min_dists, min_dists_trajs, native_dists_trajs, weights = load_inputs(hdf5_path)
N_FEATURES = len(dihedral_trajs[0][0]) + len(rmsds_trajs[0][0]) + len(min_dists_trajs[0][0]) + len(native_dists_trajs[0][0])
features_arr = np.empty((len(dihedral_trajs), 2, N_FEATURES), dtype=np.float32)
for i in range(features_arr.shape[0]):
for j in range(2):
features_arr[i,j] = np.concatenate((dihedral_trajs[i][j], rmsds_trajs[i][j], min_dists_trajs[i][j], native_dists_trajs[i][j]), dtype=np.float32)
# DeepTime's TrajectoriesDataset expects a list of NumPy arrays as the collection of trajectories
features = [val for val in features_arr]
Loading /dickson/s2/jim/repos/bmb-961-machine-learning-for-molecular-dynamics/Project/data/sys4_CGRP_4mut_CLR_R1/REVO/tail2_0/wepy.resultstail2_0.h5 ... Loading weights Loading weights from pickle file Loaded weights
dataset = TrajectoriesDataset.from_numpy(lagtime=1, data=features)
n_val = int(len(dataset)*.3)
train_data, val_data = torch.utils.data.random_split(dataset, [len(dataset)-n_val, n_val])
pass
if output_states is None:
n_runs = 3 #5
all_output_states = np.arange(2, 15+1)
#all_output_states = np.arange(2, 4+1)
vamp_scores = []
all_sv_len = all_output_states[-1] - all_output_states[0] + 1
all_sv = np.full((np.max(all_output_states), all_sv_len, n_runs), np.nan)
all_sv[0] = 1
for state_i, output_states in enumerate(all_output_states):
run_scores = []
for run in range(n_runs):
lobe = nn.Sequential(
nn.Linear(N_FEATURES, 20), nn.ELU(),
nn.Linear(20, output_states),
nn.Softmax(dim=1) # obtain fuzzy probability distribution over output states
)
lobe = lobe.to(device=device)
#print(lobe)
loader_train = DataLoader(train_data, batch_size=10000, shuffle=True)
loader_val = DataLoader(val_data, batch_size=len(val_data), shuffle=False)
vampnet = VAMPNet(lobe=lobe, learning_rate=5e-3, device=device)
model = vampnet.fit(loader_train, n_epochs=5, validation_loader=loader_val).fetch_model()
# Analysis step 1: Find number of output states to use
sv = get_singular_values(vampnet, loader_val)[:-1]
for j, val in enumerate(sv, start=1):
all_sv[j, state_i, run] = val
run_scores.append(vampnet.validation_scores[-1][1])
vamp_scores.append(run_scores)
if True or output_states is None:
mean_scores = np.average(vamp_scores, axis=1)
std_scores = np.std(vamp_scores, axis=1)
plt.plot(all_output_states, all_output_states, 'r--', label='Theoretical Max. Score')
plt.plot(all_output_states, mean_scores, label='Average Score')
#plt.fill_between(all_output_states, mean_scores - std_scores, mean_scores + std_scores, alpha=0.2)
plt.xticks(all_output_states)
plt.xlabel('Number of Output States')
plt.ylabel('VAMP Score')
plt.legend()
plt.savefig(join(hdf5_dir, 'vampnet_scores_new.png'))
#plt.clf()
Image(filename=join(hdf5_dir, 'vampnet_scores.png'))
if output_states is None:
mean_sv = np.average(all_sv, axis=2)
for sv_vals in mean_sv:
plt.plot(all_output_states, sv_vals)
plt.xticks(all_output_states)
plt.xlabel('Number of Output States')
plt.ylabel('Singular Value Magnitude')
plt.savefig(join(hdf5_dir, 'vampnet_sv.png'))
plt.clf()
Image(filename=join(hdf5_dir, 'vampnet_sv.png'))
from torch.utils.data import SubsetRandomSampler, Subset
def get_model(dataset, train_ids, test_ids):
lobe = nn.Sequential(nn.Linear(N_FEATURES, 20),
nn.ELU(),
nn.Linear(20, output_states),
nn.Softmax(dim=1)) # obtain fuzzy probability distribution over output states
lobe = lobe.to(device=device)
#print(lobe)
train_set = Subset(dataset, train_ids)
val_set = Subset(dataset, test_ids)
loader_train = DataLoader(train_set, batch_size=10000)
loader_val = DataLoader(val_set, batch_size=len(val_set))
vampnet = VAMPNet(lobe=lobe, learning_rate=5e-3, device=device)
model = vampnet.fit(loader_train, n_epochs=5, validation_loader=loader_val).fetch_model()
return vampnet, model
from sklearn.model_selection import KFold
splits = 5
k_fold = KFold(n_splits=splits, shuffle=True)
val_scores = np.full(splits, np.nan)
for i, (train_ids, test_ids) in enumerate(k_fold.split(dataset)):
vampnet_trial, model_trial = get_model(dataset, train_ids, test_ids)
val_score = vampnet_trial.validation_scores[-1,-1]
# Keep worst-performing model
if i == 0 or val_score < np.nanmin(val_scores):
vampnet = vampnet_trial
model = model_trial
val_scores[i] = val_score
plt.bar(np.arange(1, splits+1), val_scores)
plt.title('5-Fold Cross-Validation')
plt.xlabel('Test Data Subset')
plt.ylabel('VAMP-2 Score\n(Max. Score = 7)')
print(np.mean(val_scores))
print(np.std(val_scores))
6.77506742477417 0.019307468708692196
projections = [model.transform(traj) for traj in features]
min_dist_by_state = [[] for i in range(output_states)]
for i in range(len(projections)):
state = np.argmax(projections[i][1])
min_dist_by_state[state].append(min_dists_trajs[i][1][0])
avg_min_dist_by_state = [np.average(state_proj) for state_proj in min_dist_by_state]
N_CLUSTERS = 10
FRAME_CLUSTERS_PATH = join(hdf5_dir, 'frame_clusters.pkl')
CLUSTER_CENTERS_PATH = join(hdf5_dir, 'cluster_centers.pkl')
try:
raise FileNotFoundError
with open(FRAME_CLUSTERS_PATH, 'rb') as f:
frame_clusters = pickle.load(f)
with open(CLUSTER_CENTERS_PATH, 'rb') as f:
cluster_centers = pickle.load(f)
except FileNotFoundError:
# Cluster using the projections of each frame onto the output states
# Note: this is using the fuzzy memberships
cluster = MiniBatchKMeans(N_CLUSTERS).fit_fetch(projections)
# clusters are stored as starting frame -> finishing frame
# TODO Increase lag time to look at longer transitions?
frame_clusters = [cluster.transform(x) for x in projections]
cluster_centers = cluster.cluster_centers
with open(FRAME_CLUSTERS_PATH, 'wb') as f:
pickle.dump(frame_clusters, f)
with open(CLUSTER_CENTERS_PATH, 'wb') as f:
pickle.dump(cluster_centers, f)
# The bound state is considered to be the state where the avg. RMSD between the peptide and the initial (bound) peptide is the smallest
# Conversely, the unbound state is considered to be the state where the avg. RMSD between the peptide and the initial (bound) peptide is the largest
cluster_dists = np.zeros(N_CLUSTERS)
cluster_weights = np.zeros(N_CLUSTERS)
for i in range(len(sw)):
_, walker, cycle = sw[i][1]
cluster_dists[frame_clusters[i][1]] += (min_dists[walker][cycle] * weights[walker][cycle][0])
cluster_weights[frame_clusters[i][1]] += weights[walker][cycle][0]
# Calc. avg. min. distance from peptide to RAMP/CLR
cluster_dists /= cluster_weights
bound_cluster = None
for i, dist in enumerate(cluster_dists):
if dist > 0 and (bound_cluster is None or dist < cluster_dists[bound_cluster]):
bound_cluster = i
#bound_cluster = np.argmin(cluster_dists)
unbound_cluster = np.argmax(cluster_dists)
if False:
print('Bound cluster projection values:')
for state, proj in enumerate(cluster_centers[bound_cluster]):
print(f'\tState {state}: {proj}')
print('\nUnbound cluster projection values:')
for state, proj in enumerate(cluster_centers[unbound_cluster]):
print(f'\tState {state}: {proj}')
print('\nDifference')
for state, proj in enumerate(cluster_centers[bound_cluster] - cluster_centers[unbound_cluster]):
print(f'\tState {state}: {proj}')
states = np.arange(cluster_centers.shape[1])
plt.bar(states - 0.2, cluster_centers[bound_cluster], 0.4, label='Bound cluster')
plt.bar(states + 0.2, cluster_centers[unbound_cluster], 0.4, label='Unbound cluster')
plt.legend()
plt.xlabel('Output State')
plt.ylabel('Avg. Projection')
plt.show()
plt.clf()
<Figure size 640x480 with 0 Axes>
BOUND_TRAJ_PATH = join(hdf5_dir, 'bound_traj.pdb')
UNBOUND_TRAJ_PATH = join(hdf5_dir, 'unbound_traj.pdb')
try:
bound_trajectory = mdj.load_pdb(BOUND_TRAJ_PATH)
unbound_trajectory = mdj.load_pdb(UNBOUND_TRAJ_PATH)
except FileNotFoundError:
bound_frames = []
unbound_frames = []
for i in range(len(sw)):
f_cluster = frame_clusters[i][1]
if f_cluster == bound_cluster:
bound_frames.append(sw[i][1])
elif f_cluster == unbound_cluster:
unbound_frames.append(sw[i][1])
try:
we = WepyHDF5(hdf5_path,'r')
except (IOError, AttributeError, OSError) as e:
# Sometimes if the Python script exits prematurely, it does not reset the HDF5
# file's flag. This manually resets it, allowing the file to be opened.
os.system(f'h5clear -s {hdf5_path}')
we = WepyHDF5(hdf5_path,'r')
with we:
if len(bound_frames) > 50:
bound_frames = bound_frames[:50]
if len(unbound_frames) > 50:
unbound_frames = unbound_frames[:50]
bound_trajectory = we.trace_to_mdtraj(bound_frames)
unbound_trajectory = we.trace_to_mdtraj(unbound_frames)
bound_fields = we.get_trace_fields(bound_frames, ['observables/aligned_images'])
unbound_fields = we.get_trace_fields(unbound_frames, ['observables/aligned_images'])
bound_trajectory.xyz = bound_fields['observables/aligned_images']
unbound_trajectory.xyz = unbound_fields['observables/aligned_images']
bound_trajectory.save_pdb(BOUND_TRAJ_PATH, force_overwrite=True)
unbound_trajectory.save_pdb(UNBOUND_TRAJ_PATH, force_overwrite=True)
initial_pose = nglview.show_mdtraj(PDBS[sys], default_representation=False)
initial_pose.add_cartoon(RAMP_SELECTIONS[sys], color='purple') # RAMP
initial_pose.add_cartoon(CLR_SELECTIONS[sys], color='orange') # CLR
initial_pose.add_representation('ball+stick', selection=LIGAND_SELECTIONS[sys], color='red') # Peptide
initial_pose.center()
initial_pose
NGLWidget()
from nglview.contrib.movie import MovieMaker
bound_view = nglview.show_mdtraj(bound_trajectory, default_representation=False)
bound_view.add_cartoon(RAMP_SELECTIONS[sys], color='purple') # RAMP
bound_view.add_cartoon(CLR_SELECTIONS[sys], color='orange') # CLR
bound_view.add_representation('ball+stick', selection=LIGAND_SELECTIONS[sys], color='red') # Peptide
bound_view.center()
bound_movie = MovieMaker(bound_view, output='bound_images.gif', in_memory=True)
print(os.getcwd())
bound_movie.make()
bound_view
/dickson/s2/jim/repos/bmb-961-machine-learning-for-molecular-dynamics/Project/src
NGLWidget(max_frame=49)
unbound_view = nglview.show_mdtraj(unbound_trajectory, default_representation=False)
unbound_view.add_cartoon(RAMP_SELECTIONS[sys], color='purple') # RAMP
unbound_view.add_cartoon(CLR_SELECTIONS[sys], color='orange') # CLR
unbound_view.add_representation('ball+stick', selection=LIGAND_SELECTIONS[sys], color='red') # Peptide
unbound_view.center()
unbound_view
NGLWidget(max_frame=49)
if True:
# Construct the markov state model
bmsm = BayesianMSM(lagtime=1, maxerr=0.1).fit_fetch(frame_clusters)
# For this analysis, extremely unlikely transitions (defined as transitions that occured less than 3 times) are ignored
counts_mat = bmsm.prior.count_model.count_matrix_full
trim_trans_mat = bmsm.prior.transition_matrix
trim_trans_mat[counts_mat < 3] = 0
#Finally, calculate the committor for each state
committors = committor(trim_trans_mat, A=bound_cluster, B=unbound_cluster)
# Find avg. change in committor for each cluster (using the final state)
cluster_avg_features = np.zeros((N_CLUSTERS, N_FEATURES))
counts = np.zeros_like(committors)
# TODO weight by frame weight
# Find avg. d_committor and avg. features for each cluster
for i in range(len(frame_clusters)):
cluster = frame_clusters[i][1]
cluster_avg_features[cluster] += features[i][1]
counts[cluster] += 1
for i in range(len(counts)):
cluster_avg_features[i] /= counts[i]
pass
from deeptime.plots import plot_markov_model
from matplotlib.colors import Normalize
from matplotlib import cm
# plot_markov_model() requires the row sums to be within a certain error tolerance.
# This will scale the elements by row so that each row's elements sum to one. This is for vizualization purposes only.
viz_trim_trans_mat = np.copy(trim_trans_mat)
for i in range(viz_trim_trans_mat.shape[0]):
viz_trim_trans_mat[i] /= np.sum(viz_trim_trans_mat[i])
norm = Normalize(vmin=np.min(cluster_dists), vmax=np.max(cluster_dists), clip=True)
mapper = cm.ScalarMappable(norm=norm, cmap=cm.get_cmap('coolwarm'))
plot_markov_model(viz_trim_trans_mat, state_colors=mapper.to_rgba(cluster_dists))
[[0.35336916 0.47206866 0.8925704 1. ] [0.2526626 0.33283679 0.78366503 1. ] [0.2298057 0.29871797 0.75368315 1. ] [0.33837651 0.45281861 0.87931708 1. ] [0.36850661 0.49114119 0.90524313 1. ] [0.35841498 0.47842617 0.89679465 1. ] [0.30906032 0.41349827 0.85012763 1. ] [0.4148009 0.54687353 0.93908753 1. ] [0.70567316 0.01555616 0.15023281 1. ] [0.33349048 0.44626522 0.87445217 1. ]]
(<AxesSubplot:>, {0: array([-0.4221958 , 0.24621369]), 1: array([ 1. , -0.45517761]), 2: array([-0.38633693, -0.3983539 ]), 3: array([0.24604885, 0.02518475]), 4: array([-0.16262182, 0.30198685]), 5: array([-0.50871346, -0.1639395 ]), 6: array([ 0.76504383, -0.22023708]), 7: array([-0.24291366, -0.15079961]), 8: array([-0.12232792, 0.79307721]), 9: array([-0.16598308, 0.02204521])})