Cheminformatics/RP1/create_IFP_datasets_v2_dense.py

800 lines
67 KiB
Python

# -*- coding: utf-8 -*-
"""
Created on Wed May 26 20:27:23 2021
@author: Jonathan
The concatenation is not actual concatenation! Its just a sum up.. (my bad)
"""
import os
import pandas as pd
pd.options.mode.chained_assignment = None # default='warn'
import oddt
import oddt.interactions
import oddt.fingerprints
import oddt.scoring.descriptors.binana
import oddt.docking.AutodockVina
import rdkit
from oddt.scoring.functions.RFScore import rfscore
from oddt.scoring.functions.NNScore import nnscore
from oddt.scoring.functions.PLECscore import PLECscore
import pickle
import re
from rdkit import Chem
import sys
import gc
import urllib.request
def find_number(text, c):
return re.findall(r'%s(\d+)' % c, text)
# Definitions created by Olivier Béquignon (Adding them to add more information to the dataset)
def get_pocket(klifs_id):
'''Obtain the 85 residue binding pocket structure from the specified KLIFS structure ID
:param klifs_id: KLIFS structure ID
:return: ODDT protein structure of the binding pocket
'''
response = urllib.request.urlopen(f'https://klifs.net/api_v2/structure_get_pocket?structure_ID={klifs_id}')
data = response.read().decode()
pocket = oddt.toolkit.readstring('mol2', data)
pocket.protein = True
return pocket
def get_ligand(klifs_id):
'''Obtain the ligand structure from the specified KLIFS structure ID
:param klifs_id: KLIFS structure ID
:return: ODDT ligand structure
'''
response = urllib.request.urlopen(f'https://klifs.net/api_v2/structure_get_ligand?structure_ID={klifs_id}')
data = response.read().decode()
if len(data):
return oddt.toolkit.readstring('mol2', data)
def get_pocket_IFP(klifs_id: int=None,
pocket: oddt.toolkit.readfile=None,
ligand: oddt.toolkit.readfile=None):
'''Obtain the interaction fingerprint of the ligand and protein pocket residues.
:param klifs_id: KLIFS structure ID (ignored if pocket and ligand provided)
:param pocket: ODDT pocket. If None, the KLIFS pocket is used
:param ligand: ODDT ligand.If None, the KLIFS ligand is used
:return: Binary IFP
'''
if klifs_id is None and pocket is None and ligand is None:
raise ValueError('Must at least provide a KLIFS ID if any of pocket and/or ligand is/are missing')
if ligand is None:
ligand = get_ligand(klifs_id)
if pocket is None:
pocket = get_pocket(klifs_id)
return oddt.fingerprints.InteractionFingerprint(ligand, pocket)
def to_dense_fp(bits, size):
X = dict(list(map(int, x.split(': '))) for x in bits[1:-1].split(', '))
return [X[i] if i in X.keys() else 0 for i in range(size)]
to_sparse_fp = lambda X: {i: x for i, x in enumerate(map(int, X[1:-1].split(', '))) if x > 0}
n_in_range = 0
n_per_step = 1
var_from = sys.argv[1]
var_to = sys.argv[2]
next_from = int(var_from) + 1
print(var_from, var_to)
if var_from == var_to:
print("var_from and var_to are the same, exiting!")
exit()
try:
with open(f'config_{var_from}_{var_to}.txt', 'r') as f:
config_file = f.readlines()
file_number = int(config_file[0].replace("\n",""))
count = -1
group_number = int(config_file[2].replace("\n",""))
var_from = int(config_file[3].replace("\n",""))
var_to = int(config_file[4])
print("File number and count", file_number, count)
f.close()
except:
print("No config file. Count is -1")
file_number = 0
count =-1
group_number = 0
# Load in dataset and add empty columns for the Interaction Fingerprints
csv_dataframe = pd.read_csv('../KLIFS_kinase_structure_data_selection_subselection_np.csv')
csv_active_compounds = pd.read_csv('../../my_rp1_compounds_kinases/uniprot_kinase_actives/uniprot_kinase_actives.csv')
csv_inactive_compounds = pd.read_csv('../../my_rp1_compounds_kinases/uniprot_kinase_inactives/uniprot_kinase_inactives.csv')
dataframe_SIFP = pd.DataFrame()
dataframe_SIFP["Type"],dataframe_SIFP["KLIFS_pocket_IFP"],dataframe_SIFP["Conc_KLIFS_pocket_IFP"],dataframe_SIFP["VinaScore"],dataframe_SIFP["RFv1"],dataframe_SIFP["RFv2"],dataframe_SIFP["RFv3"],dataframe_SIFP["nn_score"],dataframe_SIFP["plec_score"],dataframe_SIFP["SMILES"],dataframe_SIFP["pchembl_value_Mean"],dataframe_SIFP["pchembl_value_Median"],dataframe_SIFP["protein"],dataframe_SIFP["compound"],dataframe_SIFP["decoy_group"] = '','','','','','','','','','','','','','',''
dataframe_PLEC = pd.DataFrame()
dataframe_PLEC["Type"],dataframe_PLEC["PLEC"],dataframe_PLEC["Conc_PLEC"],dataframe_PLEC["VinaScore"],dataframe_PLEC["RFv1"],dataframe_PLEC["RFv2"],dataframe_PLEC["RFv3"],dataframe_PLEC["nn_score"],dataframe_PLEC["plec_score"],dataframe_PLEC["SMILES"],dataframe_PLEC["pchembl_value_Mean"],dataframe_PLEC["pchembl_value_Median"],dataframe_PLEC["protein"],dataframe_PLEC["compound"],dataframe_PLEC["decoy_group"] = '','','','','','','','','','','','','','',''
dataframe_SPLIF_ECFP2 = pd.DataFrame()
dataframe_SPLIF_ECFP2["Type"],dataframe_SPLIF_ECFP2["SPLIF_ECFP2"],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"],dataframe_SPLIF_ECFP2["VinaScore"],dataframe_SPLIF_ECFP2["RFv1"],dataframe_SPLIF_ECFP2["RFv2"],dataframe_SPLIF_ECFP2["RFv3"],dataframe_SPLIF_ECFP2["nn_score"],dataframe_SPLIF_ECFP2["plec_score"],dataframe_SPLIF_ECFP2["SMILES"],dataframe_SPLIF_ECFP2["pchembl_value_Mean"],dataframe_SPLIF_ECFP2["pchembl_value_Median"],dataframe_SPLIF_ECFP2["protein"],dataframe_SPLIF_ECFP2["compound"],dataframe_SPLIF_ECFP2["decoy_group"] = '','','','','','','','','','','','','','',''
dataframe_SPLIF_ECFP4 = pd.DataFrame()
dataframe_SPLIF_ECFP4["Type"],dataframe_SPLIF_ECFP4["SPLIF_ECFP4"],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"],dataframe_SPLIF_ECFP4["VinaScore"],dataframe_SPLIF_ECFP4["RFv1"],dataframe_SPLIF_ECFP4["RFv2"],dataframe_SPLIF_ECFP4["RFv3"],dataframe_SPLIF_ECFP4["nn_score"],dataframe_SPLIF_ECFP4["plec_score"],dataframe_SPLIF_ECFP4["SMILES"],dataframe_SPLIF_ECFP4["pchembl_value_Mean"],dataframe_SPLIF_ECFP4["pchembl_value_Median"],dataframe_SPLIF_ECFP4["protein"],dataframe_SPLIF_ECFP4["compound"],dataframe_SPLIF_ECFP4["decoy_group"] = '','','','','','','','','','','','','','',''
dataframe_SPLIF_ECFP6 = pd.DataFrame()
dataframe_SPLIF_ECFP6["Type"],dataframe_SPLIF_ECFP6["SPLIF_ECFP6"],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"],dataframe_SPLIF_ECFP6["VinaScore"],dataframe_SPLIF_ECFP6["RFv1"],dataframe_SPLIF_ECFP6["RFv2"],dataframe_SPLIF_ECFP6["RFv3"],dataframe_SPLIF_ECFP6["nn_score"],dataframe_SPLIF_ECFP6["plec_score"],dataframe_SPLIF_ECFP6["SMILES"],dataframe_SPLIF_ECFP6["pchembl_value_Mean"],dataframe_SPLIF_ECFP6["pchembl_value_Median"],dataframe_SPLIF_ECFP6["protein"],dataframe_SPLIF_ECFP6["compound"],dataframe_SPLIF_ECFP6["decoy_group"] = '','','','','','','','','','','','','','',''
ListFaultyStructures = []
# Iterate through my csv and select for each structure the folder with actives and the folder with inactives. Then calculate fingerprints.
# for i in range(len(csv_dataframe["filename"])):
# for i in range(0+n_in_range*n_per_step, n_per_step+n_in_range*n_per_step):
for i in range(int(var_from), int(var_to)):
filename = csv_dataframe["filename"][i]
group = csv_dataframe["group"][i]
kinase_ID = csv_dataframe["kinase_ID"][i]
structure_ID = csv_dataframe["structure_ID"][i]
uniprot = csv_dataframe["uniprot"][i]
klifs_id = csv_dataframe["structure_ID"][i]
print(filename, group, kinase_ID, structure_ID, uniprot)
os.system(f'mkdir ../IFP_datasets/SIFP_v2/{filename[:-5]}')
os.system(f'mkdir ../IFP_datasets/PLEC_v2/{filename[:-5]}')
os.system(f'mkdir ../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}')
os.system(f'mkdir ../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}')
os.system(f'mkdir ../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}')
try:
protein = ''
try:
os.system(f'obabel -ipdbqt ../../my_rp1_compounds_kinases/selected_mol2structures/{group}_pdbqt/{filename[:-5]}.pdbqt -opdb -O sparse_{filename[:-5]}.pdb')
protein = next(oddt.toolkit.readfile('pdb', f'sparse_{filename[:-5]}.pdb'))
proteinname = f'../../my_rp1_compounds_kinases/selected_mol2structures/{group}_pdbqt/{filename[:-5]}.pdbqt'
print("loaded in structure on 1st try")
except:
print("On except..")
ListFaultyStructures.append("Unsanitized! ",filename)
protein = next(oddt.toolkit.readfile('pdb', f'sparse_{filename[:-5]}.pdb', sanitize=False))
print("Unsanitized! ", filename)
print("loaded in protein succesfully!")
protein.protein = True
rf1 = rfscore.load(version=1)
with open('pickles/rf1.pickle', 'wb') as f:
pickle.dump(rf1, f)
f.close()
rf2 = rfscore.load(version=2)
with open('pickles/rf2.pickle', 'wb') as f:
pickle.dump(rf2, f)
f.close()
rf3 = rfscore.load(version=3)
with open('pickles/rf3.pickle', 'wb') as f:
pickle.dump(rf3, f)
f.close()
nn = nnscore.load()
with open('pickles/nn.pickle', 'wb') as f:
pickle.dump(nn, f)
f.close()
plecscore = PLECscore.load()
with open('pickles/plecscore.pickle', 'wb') as f:
pickle.dump(plecscore, f)
f.close()
rf1.set_protein(protein)
rf2.set_protein(protein)
rf3.set_protein(protein)
nn.set_protein(protein)
plecscore.set_protein(protein)
# KLIFS retrieving pocket for SIFP
pocket = get_pocket(klifs_id)
try:
ligand_location_actives = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/actives/"
ligand_location_inactives = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/inactives/"
decoy_location_1000 = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/decoys/"
decoy_location_1625 = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/decoys1625/"
run_number = 0
number_of_actives = len(os.listdir(ligand_location_actives))
number_of_inactives = len(os.listdir(ligand_location_inactives))
grouplist = ["AGC","Atypical","CAMK","CK1","CMGC","STE","TK","TKL"]
for j in sorted(os.listdir(ligand_location_actives))[file_number:]:
print("Actives ", j, number_of_actives)
print(file_number, number_of_actives)
if file_number >= number_of_actives:
pass
else:
try:
file_number += 1
run_number += 1
group_number = 0
ligandname = f'{ligand_location_actives}{j}'
# ligand = next(oddt.toolkit.readfile('pdbqt', ligand_location_actives+j))
os.system(f'obabel -ipdbqt {ligand_location_actives}{j} -osdf -O sparse_{filename[:-5]}.sdf')
ligand = next(oddt.toolkit.readfile('sdf',f'sparse_{filename[:-5]}.sdf'))
Type = "active"
# IFP = oddt.fingerprints.InteractionFingerprint(ligand, protein)
# SIFP = oddt.fingerprints.SimpleInteractionFingerprint(ligand,protein)
KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand)
Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP
KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP)))
PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
Conc_PLEC = PLEC
PLEC = ''.join(str(list(PLEC)))
SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5)
SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096)
Conc_SPLIF_ECFP2 = SPLIF_ECFP2
SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2)))
SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5)
SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096)
Conc_SPLIF_ECFP4 = SPLIF_ECFP4
SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4)))
SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5)
SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096)
Conc_SPLIF_ECFP6 = SPLIF_ECFP6
SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6)))
RFv1 = rf1.predict(ligand)
RFv2 = rf2.predict(ligand)
RFv3 = rf3.predict(ligand)
plec_score = plecscore.predict(ligand)
nn_score = nn.predict([ligand])
VinaScore = ligand.data
# This part is for concatenating all 5 poses
try:
# ligand = list(oddt.toolkit.readfile('pdbqt', ligand_location_actives+j))
ligand = list(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf'))
for l in [x for x in range(len(ligand)) if x != 0]:
KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[l])
Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2
PLEC_v2 = oddt.fingerprints.PLEC(ligand[l], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
Conc_PLEC += PLEC_v2
SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[l], protein, depth=1, size=4096, distance_cutoff=4.5)
SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096)
Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2
SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[l], protein, depth=2, size=4096, distance_cutoff=4.5)
SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096)
Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2
SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[l], protein, depth=3, size=4096, distance_cutoff=4.5)
SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096)
Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2
Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP)))
Conc_PLEC = ''.join(str(list(Conc_PLEC)))
Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2)))
Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4)))
Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6)))
print("Concatenated poses succesfully! Number of poses: "+str(l))
except:
print("Concat error!")
pass
#Dense to sparse for storage
# PLEC = PLEC.apply(to_sparse_fp)
# Conc_PLEC = oddt.fingerprints.dense_to_sparse(Conc_PLEC)
# print(len(PLEC))
# SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP2, size=4096)
# Conc_SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP2, size=4096)
# SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP4, size=4096)
# Conc_SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP4, size=4096)
# SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP6, size=4096)
# Conc_SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP6, size=4096)
#Find and calculate SMILES
with open(ligand_location_actives+j,'r') as f:
pdbqt_file = f.read()
f.close()
number_from_file = find_number(pdbqt_file, '.smi:')
print(number_from_file)
target_smiles = f'../../my_rp1_compounds_kinases/uniprot_kinase_{Type}s/{group}/{uniprot}_{Type}s.smi'
with open(target_smiles, 'r') as f:
smiles_file = f.readlines()
f.close()
SMILES = str(smiles_file[int(number_from_file[0])-1]).replace("\n","")
print(SMILES)
for q in range(len(csv_active_compounds)):
if csv_active_compounds["standardised_smiles"][q] == SMILES and csv_active_compounds["accession"][q] == uniprot:
pchembl_mean = csv_active_compounds["pchembl_value_Mean"][q]
pchembl_median = csv_active_compounds["pchembl_value_Median"][q]
break
else:
pchembl_mean = ''
pchembl_median = ''
pass
# Conc_PLEC,Conc_SPLIF_ECFP2,Conc_SPLIF_ECFP4,Conc_SPLIF_ECFP6,PLEC_v2,SPLIF_ECFP2_v2,SPLIF_ECFP4_v2,SPLIF_ECFP6_v2 ='','','','','','','',''
count += 1
dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
print("Done appending to dataframe! Number appended", count, int(run_number))
if int(run_number) > 375:
print("Going dark to preserve memory..")
with open(f'config_{var_from}_{var_to}.txt', 'w') as f:
to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to)
print(to_write)
f.write(to_write)
f.close()
dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
else:
pass
gc.collect()
# del(ligand,SMILES,RFv1,RFv2,RFv3,PLEC,SPLIF_ECFP2,SPLIF_ECFP4,SPLIF_ECFP6,Conc_PLEC,Conc_SPLIF_ECFP2,Conc_SPLIF_ECFP4,Conc_SPLIF_ECFP6,VinaScore,PLEC_v2,SPLIF_ECFP2_v2,SPLIF_ECFP4_v2,SPLIF_ECFP6_v2)
# Other descriptors I could use later on?
# protein_atoms, ligand_atoms, strict = oddt.interactions.hbonds(protein, ligand)
# print(protein_atoms['resname'])
# wut = oddt.interactions.close_contacts(protein_atoms, ligand_atoms, cutoff=4, x_column='coords', y_column='coords')
except:
print("Actives. Errors for some reason..")
print("Going to inactives.. ")
for k in sorted(os.listdir(ligand_location_inactives))[(file_number-number_of_actives):]:
print("Inactives ", k, number_of_inactives)
if int(file_number) >= (int(number_of_actives)+int(number_of_inactives)):
print("passing! len actives + actives is: ", str(number_of_actives), str(number_of_inactives))
pass
else:
try:
file_number += 1
run_number += 1
group_number = 0
ligandname = f'{ligand_location_inactives}{k}'
ligand = ''
Type = "inactive"
# ligand = next(oddt.toolkit.readfile('pdbqt', ligand_location_inactives+k))
os.system(f'obabel -ipdbqt {ligand_location_inactives}{k} -osdf -O sparse_{filename[:-5]}.sdf')
ligand = next(oddt.toolkit.readfile('sdf',f'sparse_{filename[:-5]}.sdf'))
KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand)
Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP
KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP)))
PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
Conc_PLEC = PLEC
PLEC = ''.join(str(list(PLEC)))
SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5)
SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096)
Conc_SPLIF_ECFP2 = SPLIF_ECFP2
SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2)))
SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5)
SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096)
Conc_SPLIF_ECFP4 = SPLIF_ECFP4
SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4)))
SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5)
SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096)
Conc_SPLIF_ECFP6 = SPLIF_ECFP6
SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6)))
RFv1 = rf1.predict(ligand)
RFv2 = rf2.predict(ligand)
RFv3 = rf3.predict(ligand)
plec_score = plecscore.predict(ligand)
nn_score = nn.predict([ligand])
VinaScore = ligand.data
# This part is for concatenating all 5 poses
try:
# ligand = list(oddt.toolkit.readfile('pdbqt', ligand_location_inactives+k))
ligand = list(oddt.toolkit.readfile('sdf',f'sparse_{filename[:-5]}.sdf'))
for m in [x for x in range(len(ligand)) if x != 0]:
KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[m])
Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2
PLEC_v2 = oddt.fingerprints.PLEC(ligand[m], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
Conc_PLEC += PLEC_v2
SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[m], protein, depth=1, size=4096, distance_cutoff=4.5)
SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096)
Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2
SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[m], protein, depth=2, size=4096, distance_cutoff=4.5)
SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096)
Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2
SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[m], protein, depth=3, size=4096, distance_cutoff=4.5)
SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096)
Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2
Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP)))
Conc_PLEC = ''.join(str(list(Conc_PLEC)))
Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2)))
Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4)))
Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6)))
print("Concatenated poses succesfully! Number of poses: "+str(m))
except:
print("Inactives. This file might not have 5 poses?")
#Find and calculate SMILES
with open(ligand_location_inactives+k,'r') as f:
pdbqt_file = f.read()
f.close()
number_from_file = find_number(pdbqt_file, '.smi:')
print(number_from_file)
target_smiles = f'../../my_rp1_compounds_kinases/uniprot_kinase_{Type}s/{group}/{uniprot}_{Type}s.smi'
with open(target_smiles, 'r') as f:
smiles_file = f.readlines()
f.close()
SMILES = str(smiles_file[int(number_from_file[0])-1]).replace("\n","")
print(SMILES)
for r in range(len(csv_inactive_compounds)):
if csv_inactive_compounds["standardised_smiles"][r] == SMILES and csv_inactive_compounds["accession"][r] == uniprot:
pchembl_mean = csv_inactive_compounds["pchembl_value_Mean"][r]
pchembl_median = csv_inactive_compounds["pchembl_value_Median"][r]
break
else:
pchembl_mean = ''
pchembl_median = ''
pass
count += 1
dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
print("Done appending to dataframe! Number appended", count, int(run_number))
gc.collect()
if int(run_number) > 375:
print("Going dark to preserve memory..")
with open(f'config_{var_from}_{var_to}.txt', 'w') as f:
to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to)
print(to_write)
f.write(to_write)
f.close()
dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
else:
pass
except:
print("No pdbqt files available maybe?")
if int(count) == -1:
pass
else:
print("Going dark for the decoys..")
with open(f'config_{var_from}_{var_to}.txt', 'w') as f:
to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to)
print(to_write)
f.write(to_write)
f.close()
dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
# Decoys for loop here
decoy_number = 0
for n in range(group_number, len(grouplist)):
print("On decoys!")
if int(decoy_number) >= 125:
print("Going dark to preserve memory..")
if int(group_number) > 8:
next_from = int(var_from) + 1
if next_from == var_to:
print("DONE WITH RUN")
sys.exit()
os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str("blub"))
file_number, count, group_number = 0,-1,0
with open(f'config_{next_from}_{var_to}.txt', 'w') as f:
to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(next_from)+"\n"+str(var_to)
print(to_write)
f.write(to_write)
f.close()
dataframe_SIFP.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}.csv')
dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
with open(f'config_{var_from}_{var_to}.txt', 'w') as f:
to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to)
print(to_write)
f.write(to_write)
f.close()
dataframe_SIFP.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}.csv')
dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
else:
group_number += 1
pass
for o in sorted(os.listdir(decoy_location_1000+grouplist[n])):
print("Decoys. Decoynumber - group_number", o, group_number)
try:
file_number += 1
run_number += 1
decoy_number += 1
ligandname = f'{decoy_location_1000}{grouplist[n]}/{o}'
decoy_group = str(grouplist[n])
ligand = ''
Type = "decoy"
# ligand = next(oddt.toolkit.readfile('pdbqt', decoy_location_1000+grouplist[n]+"/"+o))
os.system(f'obabel -ipdbqt {decoy_location_1000}{grouplist[n]}/{o} -osdf -O sparse_{filename[:-5]}.sdf')
ligand = next(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf'))
KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand)
Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP
KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP)))
PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
Conc_PLEC = PLEC
PLEC = ''.join(str(list(PLEC)))
SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5)
SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096)
Conc_SPLIF_ECFP2 = SPLIF_ECFP2
SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2)))
SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5)
SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096)
Conc_SPLIF_ECFP4 = SPLIF_ECFP4
SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4)))
SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5)
SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096)
Conc_SPLIF_ECFP6 = SPLIF_ECFP6
SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6)))
RFv1 = rf1.predict(ligand)
RFv2 = rf2.predict(ligand)
RFv3 = rf3.predict(ligand)
plec_score = plecscore.predict(ligand)
nn_score = nn.predict([ligand])
VinaScore = ligand.data
# This part is for concatenating all 5 poses
try:
# ligand = list(oddt.toolkit.readfile('pdbqt', decoy_location_1000+grouplist[n]+"/"+o))
ligand = list(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf'))
for p in [x for x in range(len(ligand)) if x != 0]:
KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[p])
Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2
PLEC_v2 = oddt.fingerprints.PLEC(ligand[p], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
Conc_PLEC += PLEC_v2
SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=1, size=4096, distance_cutoff=4.5)
SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096)
Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2
SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=2, size=4096, distance_cutoff=4.5)
SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096)
Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2
SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=3, size=4096, distance_cutoff=4.5)
SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096)
Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2
Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP)))
Conc_PLEC = ''.join(str(list(Conc_PLEC)))
Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2)))
Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4)))
Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6)))
print("Concatenated poses succesfully! Number of poses: "+str(p))
except:
print("Decoys. This file might not have any poses?")
#Dense to sparse for storage
# PLEC = oddt.fingerprints.dense_to_sparse(PLEC, size=16384)
# Conc_PLEC = oddt.fingerprints.dense_to_sparse(Conc_PLEC, size=16384)
# SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP2, size=4096)
# Conc_SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP2, size=4096)
# SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP4, size=4096)
# Conc_SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP4, size=4096)
# SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP6, size=4096)
# Conc_SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP6, size=4096)
# This doesn't (yet) work for the decoys!
# with open(decoy_location_1000+grouplist[n]+j,'r') as f:
# pdbqt_file = f.read()
# f.close()
# number_from_file = find_number(pdbqt_file, '.smi:')
# target_smiles = f'../my_rp1_compounds_kinases/{Type}s/1000/{group}_125.smi'
# with open(target_smiles, 'r') as f:
# smiles_file = f.readlines()
# f.close()
# SMILES = str(smiles_file[int(number_from_file[0])]).replace("\n","")
SMILES,pchembl_mean,pchembl_median = '','',''
print("On dataframe!")
count += 1
dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count],dataframe_SIFP["decoy_group"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count],dataframe_PLEC["decoy_group"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
print("Done appending to dataframe! Number appended", count, int(run_number))
gc.collect()
except:
print("No pdbqt files available maybe?")
# Decoys of 1625, IF available
try:
for o in sorted(os.listdir(decoy_location_1625+grouplist[n])):
print("Decoys. Decoynumber - group_number", o, group_number)
try:
# file_number += 1
# run_number += 1
# decoy_number += 1
ligandname = f'{decoy_location_1625}{grouplist[n]}/{o}'
decoy_group = str(grouplist[n])
ligand = ''
Type = "decoy_v2"
os.system(f'obabel -ipdbqt {decoy_location_1625}{grouplist[n]}/{o} -osdf -O sparse_{filename[:-5]}.sdf')
ligand = next(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf'))
KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand)
Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP
KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP)))
PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
Conc_PLEC = PLEC
PLEC = ''.join(str(list(PLEC)))
SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5)
SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096)
Conc_SPLIF_ECFP2 = SPLIF_ECFP2
SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2)))
SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5)
SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096)
Conc_SPLIF_ECFP4 = SPLIF_ECFP4
SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4)))
SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5)
SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096)
Conc_SPLIF_ECFP6 = SPLIF_ECFP6
SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6)))
RFv1 = rf1.predict(ligand)
RFv2 = rf2.predict(ligand)
RFv3 = rf3.predict(ligand)
plec_score = plecscore.predict(ligand)
nn_score = nn.predict([ligand])
VinaScore = ligand.data
# This part is for concatenating all 5 poses
try:
# ligand = list(oddt.toolkit.readfile('pdbqt', decoy_location_1000+grouplist[n]+"/"+o))
ligand = list(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf'))
for p in [x for x in range(len(ligand)) if x != 0]:
KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[p])
Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2
PLEC_v2 = oddt.fingerprints.PLEC(ligand[p], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
Conc_PLEC += PLEC_v2
SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=1, size=4096, distance_cutoff=4.5)
SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096)
Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2
SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=2, size=4096, distance_cutoff=4.5)
SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096)
Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2
SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=3, size=4096, distance_cutoff=4.5)
SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096)
Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2
Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP)))
Conc_PLEC = ''.join(str(list(Conc_PLEC)))
Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2)))
Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4)))
Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6)))
print("Concatenated poses succesfully! Number of poses: "+str(p))
except:
print("Decoys. This file might not have any poses?")
SMILES,pchembl_mean,pchembl_median = '','',''
print("On dataframe!")
count += 1
dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count],dataframe_SIFP["decoy_group"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count],dataframe_PLEC["decoy_group"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
print("Done appending to dataframe! Number appended", count, int(run_number))
except:
print("No pdbqt files available maybe?")
except:
print("No second decoy files available")
if int(group_number) > 8:
var_from = next_from
else:
pass
if int(decoy_number) >= 125:
print("Going dark to preserve memory..")
with open(f'config_{var_from}_{var_to}.txt', 'w') as f:
to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to)
print(to_write)
f.write(to_write)
f.close()
dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
else:
pass
except:
print("There is a time to run and a time to error out")
# ListFaultyStructures.append(filename)
quit("No valid structure to calculate on")
except:
print("Maybe haven't downloaded ", filename,"moving on to next structure!")
ListFaultyStructures.append(filename)
next_from = int(var_from) + 1
if next_from == var_to:
print("DONE WITH RUN")
sys.exit()
os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str("blub"))
file_number, count, group_number = 0,-1,0
with open(f'config_{next_from}_{var_to}.txt', 'w') as f:
to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(next_from)+"\n"+str(var_to)
print(to_write)
f.write(to_write)
f.close()
dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(next_from), str(var_to))
print("Done with run!")
# dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
# dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}.csv')
# dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}.csv')
# dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}.csv')
# dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}.csv')
# with open(f'../IFP_datasets/FaultyStructures_dense_{filename[:-5]}.txt', "w") as f:
# f.writelines(ListFaultyStructures)
# f.close()
# rdkit.SimDivFilters.rdSimDivPickers.MaxMinPicker()