# -*- coding: utf-8 -*- """ Created on Wed May 26 20:27:23 2021 @author: Jonathan The concatenation is not actual concatenation! Its just a sum up.. (my bad) """ import os import pandas as pd pd.options.mode.chained_assignment = None # default='warn' import oddt import oddt.interactions import oddt.fingerprints import oddt.scoring.descriptors.binana import oddt.docking.AutodockVina import rdkit from oddt.scoring.functions.RFScore import rfscore from oddt.scoring.functions.NNScore import nnscore from oddt.scoring.functions.PLECscore import PLECscore import pickle import re from rdkit import Chem import sys import gc import urllib.request def find_number(text, c): return re.findall(r'%s(\d+)' % c, text) # Definitions created by Olivier BĂ©quignon (Adding them to add more information to the dataset) def get_pocket(klifs_id): '''Obtain the 85 residue binding pocket structure from the specified KLIFS structure ID :param klifs_id: KLIFS structure ID :return: ODDT protein structure of the binding pocket ''' response = urllib.request.urlopen(f'https://klifs.net/api_v2/structure_get_pocket?structure_ID={klifs_id}') data = response.read().decode() pocket = oddt.toolkit.readstring('mol2', data) pocket.protein = True return pocket def get_ligand(klifs_id): '''Obtain the ligand structure from the specified KLIFS structure ID :param klifs_id: KLIFS structure ID :return: ODDT ligand structure ''' response = urllib.request.urlopen(f'https://klifs.net/api_v2/structure_get_ligand?structure_ID={klifs_id}') data = response.read().decode() if len(data): return oddt.toolkit.readstring('mol2', data) def get_pocket_IFP(klifs_id: int=None, pocket: oddt.toolkit.readfile=None, ligand: oddt.toolkit.readfile=None): '''Obtain the interaction fingerprint of the ligand and protein pocket residues. :param klifs_id: KLIFS structure ID (ignored if pocket and ligand provided) :param pocket: ODDT pocket. If None, the KLIFS pocket is used :param ligand: ODDT ligand.If None, the KLIFS ligand is used :return: Binary IFP ''' if klifs_id is None and pocket is None and ligand is None: raise ValueError('Must at least provide a KLIFS ID if any of pocket and/or ligand is/are missing') if ligand is None: ligand = get_ligand(klifs_id) if pocket is None: pocket = get_pocket(klifs_id) return oddt.fingerprints.InteractionFingerprint(ligand, pocket) def to_dense_fp(bits, size): X = dict(list(map(int, x.split(': '))) for x in bits[1:-1].split(', ')) return [X[i] if i in X.keys() else 0 for i in range(size)] to_sparse_fp = lambda X: {i: x for i, x in enumerate(map(int, X[1:-1].split(', '))) if x > 0} n_in_range = 0 n_per_step = 1 var_from = sys.argv[1] var_to = sys.argv[2] next_from = int(var_from) + 1 print(var_from, var_to) if var_from == var_to: print("var_from and var_to are the same, exiting!") exit() try: with open(f'config_{var_from}_{var_to}.txt', 'r') as f: config_file = f.readlines() file_number = int(config_file[0].replace("\n","")) count = -1 group_number = int(config_file[2].replace("\n","")) var_from = int(config_file[3].replace("\n","")) var_to = int(config_file[4]) print("File number and count", file_number, count) f.close() except: print("No config file. Count is -1") file_number = 0 count =-1 group_number = 0 # Load in dataset and add empty columns for the Interaction Fingerprints csv_dataframe = pd.read_csv('../KLIFS_kinase_structure_data_selection_subselection_np.csv') csv_active_compounds = pd.read_csv('../../my_rp1_compounds_kinases/uniprot_kinase_actives/uniprot_kinase_actives.csv') csv_inactive_compounds = pd.read_csv('../../my_rp1_compounds_kinases/uniprot_kinase_inactives/uniprot_kinase_inactives.csv') dataframe_SIFP = pd.DataFrame() dataframe_SIFP["Type"],dataframe_SIFP["KLIFS_pocket_IFP"],dataframe_SIFP["Conc_KLIFS_pocket_IFP"],dataframe_SIFP["VinaScore"],dataframe_SIFP["RFv1"],dataframe_SIFP["RFv2"],dataframe_SIFP["RFv3"],dataframe_SIFP["nn_score"],dataframe_SIFP["plec_score"],dataframe_SIFP["SMILES"],dataframe_SIFP["pchembl_value_Mean"],dataframe_SIFP["pchembl_value_Median"],dataframe_SIFP["protein"],dataframe_SIFP["compound"],dataframe_SIFP["decoy_group"] = '','','','','','','','','','','','','','','' dataframe_PLEC = pd.DataFrame() dataframe_PLEC["Type"],dataframe_PLEC["PLEC"],dataframe_PLEC["Conc_PLEC"],dataframe_PLEC["VinaScore"],dataframe_PLEC["RFv1"],dataframe_PLEC["RFv2"],dataframe_PLEC["RFv3"],dataframe_PLEC["nn_score"],dataframe_PLEC["plec_score"],dataframe_PLEC["SMILES"],dataframe_PLEC["pchembl_value_Mean"],dataframe_PLEC["pchembl_value_Median"],dataframe_PLEC["protein"],dataframe_PLEC["compound"],dataframe_PLEC["decoy_group"] = '','','','','','','','','','','','','','','' dataframe_SPLIF_ECFP2 = pd.DataFrame() dataframe_SPLIF_ECFP2["Type"],dataframe_SPLIF_ECFP2["SPLIF_ECFP2"],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"],dataframe_SPLIF_ECFP2["VinaScore"],dataframe_SPLIF_ECFP2["RFv1"],dataframe_SPLIF_ECFP2["RFv2"],dataframe_SPLIF_ECFP2["RFv3"],dataframe_SPLIF_ECFP2["nn_score"],dataframe_SPLIF_ECFP2["plec_score"],dataframe_SPLIF_ECFP2["SMILES"],dataframe_SPLIF_ECFP2["pchembl_value_Mean"],dataframe_SPLIF_ECFP2["pchembl_value_Median"],dataframe_SPLIF_ECFP2["protein"],dataframe_SPLIF_ECFP2["compound"],dataframe_SPLIF_ECFP2["decoy_group"] = '','','','','','','','','','','','','','','' dataframe_SPLIF_ECFP4 = pd.DataFrame() dataframe_SPLIF_ECFP4["Type"],dataframe_SPLIF_ECFP4["SPLIF_ECFP4"],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"],dataframe_SPLIF_ECFP4["VinaScore"],dataframe_SPLIF_ECFP4["RFv1"],dataframe_SPLIF_ECFP4["RFv2"],dataframe_SPLIF_ECFP4["RFv3"],dataframe_SPLIF_ECFP4["nn_score"],dataframe_SPLIF_ECFP4["plec_score"],dataframe_SPLIF_ECFP4["SMILES"],dataframe_SPLIF_ECFP4["pchembl_value_Mean"],dataframe_SPLIF_ECFP4["pchembl_value_Median"],dataframe_SPLIF_ECFP4["protein"],dataframe_SPLIF_ECFP4["compound"],dataframe_SPLIF_ECFP4["decoy_group"] = '','','','','','','','','','','','','','','' dataframe_SPLIF_ECFP6 = pd.DataFrame() dataframe_SPLIF_ECFP6["Type"],dataframe_SPLIF_ECFP6["SPLIF_ECFP6"],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"],dataframe_SPLIF_ECFP6["VinaScore"],dataframe_SPLIF_ECFP6["RFv1"],dataframe_SPLIF_ECFP6["RFv2"],dataframe_SPLIF_ECFP6["RFv3"],dataframe_SPLIF_ECFP6["nn_score"],dataframe_SPLIF_ECFP6["plec_score"],dataframe_SPLIF_ECFP6["SMILES"],dataframe_SPLIF_ECFP6["pchembl_value_Mean"],dataframe_SPLIF_ECFP6["pchembl_value_Median"],dataframe_SPLIF_ECFP6["protein"],dataframe_SPLIF_ECFP6["compound"],dataframe_SPLIF_ECFP6["decoy_group"] = '','','','','','','','','','','','','','','' ListFaultyStructures = [] # Iterate through my csv and select for each structure the folder with actives and the folder with inactives. Then calculate fingerprints. # for i in range(len(csv_dataframe["filename"])): # for i in range(0+n_in_range*n_per_step, n_per_step+n_in_range*n_per_step): for i in range(int(var_from), int(var_to)): filename = csv_dataframe["filename"][i] group = csv_dataframe["group"][i] kinase_ID = csv_dataframe["kinase_ID"][i] structure_ID = csv_dataframe["structure_ID"][i] uniprot = csv_dataframe["uniprot"][i] klifs_id = csv_dataframe["structure_ID"][i] print(filename, group, kinase_ID, structure_ID, uniprot) os.system(f'mkdir ../IFP_datasets/SIFP_v2/{filename[:-5]}') os.system(f'mkdir ../IFP_datasets/PLEC_v2/{filename[:-5]}') os.system(f'mkdir ../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}') os.system(f'mkdir ../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}') os.system(f'mkdir ../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}') try: protein = '' try: os.system(f'obabel -ipdbqt ../../my_rp1_compounds_kinases/selected_mol2structures/{group}_pdbqt/{filename[:-5]}.pdbqt -opdb -O sparse_{filename[:-5]}.pdb') protein = next(oddt.toolkit.readfile('pdb', f'sparse_{filename[:-5]}.pdb')) proteinname = f'../../my_rp1_compounds_kinases/selected_mol2structures/{group}_pdbqt/{filename[:-5]}.pdbqt' print("loaded in structure on 1st try") except: print("On except..") ListFaultyStructures.append("Unsanitized! ",filename) protein = next(oddt.toolkit.readfile('pdb', f'sparse_{filename[:-5]}.pdb', sanitize=False)) print("Unsanitized! ", filename) print("loaded in protein succesfully!") protein.protein = True rf1 = rfscore.load(version=1) with open('pickles/rf1.pickle', 'wb') as f: pickle.dump(rf1, f) f.close() rf2 = rfscore.load(version=2) with open('pickles/rf2.pickle', 'wb') as f: pickle.dump(rf2, f) f.close() rf3 = rfscore.load(version=3) with open('pickles/rf3.pickle', 'wb') as f: pickle.dump(rf3, f) f.close() nn = nnscore.load() with open('pickles/nn.pickle', 'wb') as f: pickle.dump(nn, f) f.close() plecscore = PLECscore.load() with open('pickles/plecscore.pickle', 'wb') as f: pickle.dump(plecscore, f) f.close() rf1.set_protein(protein) rf2.set_protein(protein) rf3.set_protein(protein) nn.set_protein(protein) plecscore.set_protein(protein) # KLIFS retrieving pocket for SIFP pocket = get_pocket(klifs_id) try: ligand_location_actives = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/actives/" ligand_location_inactives = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/inactives/" decoy_location_1000 = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/decoys/" decoy_location_1625 = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/decoys1625/" run_number = 0 number_of_actives = len(os.listdir(ligand_location_actives)) number_of_inactives = len(os.listdir(ligand_location_inactives)) grouplist = ["AGC","Atypical","CAMK","CK1","CMGC","STE","TK","TKL"] for j in sorted(os.listdir(ligand_location_actives))[file_number:]: print("Actives ", j, number_of_actives) print(file_number, number_of_actives) if file_number >= number_of_actives: pass else: try: file_number += 1 run_number += 1 group_number = 0 ligandname = f'{ligand_location_actives}{j}' # ligand = next(oddt.toolkit.readfile('pdbqt', ligand_location_actives+j)) os.system(f'obabel -ipdbqt {ligand_location_actives}{j} -osdf -O sparse_{filename[:-5]}.sdf') ligand = next(oddt.toolkit.readfile('sdf',f'sparse_{filename[:-5]}.sdf')) Type = "active" # IFP = oddt.fingerprints.InteractionFingerprint(ligand, protein) # SIFP = oddt.fingerprints.SimpleInteractionFingerprint(ligand,protein) KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand) Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP))) PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True) Conc_PLEC = PLEC PLEC = ''.join(str(list(PLEC))) SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5) SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096) Conc_SPLIF_ECFP2 = SPLIF_ECFP2 SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2))) SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5) SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096) Conc_SPLIF_ECFP4 = SPLIF_ECFP4 SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4))) SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5) SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096) Conc_SPLIF_ECFP6 = SPLIF_ECFP6 SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6))) RFv1 = rf1.predict(ligand) RFv2 = rf2.predict(ligand) RFv3 = rf3.predict(ligand) plec_score = plecscore.predict(ligand) nn_score = nn.predict([ligand]) VinaScore = ligand.data # This part is for concatenating all 5 poses try: # ligand = list(oddt.toolkit.readfile('pdbqt', ligand_location_actives+j)) ligand = list(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf')) for l in [x for x in range(len(ligand)) if x != 0]: KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[l]) Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2 PLEC_v2 = oddt.fingerprints.PLEC(ligand[l], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True) Conc_PLEC += PLEC_v2 SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[l], protein, depth=1, size=4096, distance_cutoff=4.5) SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096) Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2 SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[l], protein, depth=2, size=4096, distance_cutoff=4.5) SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096) Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2 SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[l], protein, depth=3, size=4096, distance_cutoff=4.5) SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096) Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2 Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP))) Conc_PLEC = ''.join(str(list(Conc_PLEC))) Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2))) Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4))) Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6))) print("Concatenated poses succesfully! Number of poses: "+str(l)) except: print("Concat error!") pass #Dense to sparse for storage # PLEC = PLEC.apply(to_sparse_fp) # Conc_PLEC = oddt.fingerprints.dense_to_sparse(Conc_PLEC) # print(len(PLEC)) # SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP2, size=4096) # Conc_SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP2, size=4096) # SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP4, size=4096) # Conc_SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP4, size=4096) # SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP6, size=4096) # Conc_SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP6, size=4096) #Find and calculate SMILES with open(ligand_location_actives+j,'r') as f: pdbqt_file = f.read() f.close() number_from_file = find_number(pdbqt_file, '.smi:') print(number_from_file) target_smiles = f'../../my_rp1_compounds_kinases/uniprot_kinase_{Type}s/{group}/{uniprot}_{Type}s.smi' with open(target_smiles, 'r') as f: smiles_file = f.readlines() f.close() SMILES = str(smiles_file[int(number_from_file[0])-1]).replace("\n","") print(SMILES) for q in range(len(csv_active_compounds)): if csv_active_compounds["standardised_smiles"][q] == SMILES and csv_active_compounds["accession"][q] == uniprot: pchembl_mean = csv_active_compounds["pchembl_value_Mean"][q] pchembl_median = csv_active_compounds["pchembl_value_Median"][q] break else: pchembl_mean = '' pchembl_median = '' pass # Conc_PLEC,Conc_SPLIF_ECFP2,Conc_SPLIF_ECFP4,Conc_SPLIF_ECFP6,PLEC_v2,SPLIF_ECFP2_v2,SPLIF_ECFP4_v2,SPLIF_ECFP6_v2 ='','','','','','','','' count += 1 dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname print("Done appending to dataframe! Number appended", count, int(run_number)) if int(run_number) > 375: print("Going dark to preserve memory..") with open(f'config_{var_from}_{var_to}.txt', 'w') as f: to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to) print(to_write) f.write(to_write) f.close() dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv') dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv') os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to)) else: pass gc.collect() # del(ligand,SMILES,RFv1,RFv2,RFv3,PLEC,SPLIF_ECFP2,SPLIF_ECFP4,SPLIF_ECFP6,Conc_PLEC,Conc_SPLIF_ECFP2,Conc_SPLIF_ECFP4,Conc_SPLIF_ECFP6,VinaScore,PLEC_v2,SPLIF_ECFP2_v2,SPLIF_ECFP4_v2,SPLIF_ECFP6_v2) # Other descriptors I could use later on? # protein_atoms, ligand_atoms, strict = oddt.interactions.hbonds(protein, ligand) # print(protein_atoms['resname']) # wut = oddt.interactions.close_contacts(protein_atoms, ligand_atoms, cutoff=4, x_column='coords', y_column='coords') except: print("Actives. Errors for some reason..") print("Going to inactives.. ") for k in sorted(os.listdir(ligand_location_inactives))[(file_number-number_of_actives):]: print("Inactives ", k, number_of_inactives) if int(file_number) >= (int(number_of_actives)+int(number_of_inactives)): print("passing! len actives + actives is: ", str(number_of_actives), str(number_of_inactives)) pass else: try: file_number += 1 run_number += 1 group_number = 0 ligandname = f'{ligand_location_inactives}{k}' ligand = '' Type = "inactive" # ligand = next(oddt.toolkit.readfile('pdbqt', ligand_location_inactives+k)) os.system(f'obabel -ipdbqt {ligand_location_inactives}{k} -osdf -O sparse_{filename[:-5]}.sdf') ligand = next(oddt.toolkit.readfile('sdf',f'sparse_{filename[:-5]}.sdf')) KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand) Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP))) PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True) Conc_PLEC = PLEC PLEC = ''.join(str(list(PLEC))) SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5) SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096) Conc_SPLIF_ECFP2 = SPLIF_ECFP2 SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2))) SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5) SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096) Conc_SPLIF_ECFP4 = SPLIF_ECFP4 SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4))) SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5) SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096) Conc_SPLIF_ECFP6 = SPLIF_ECFP6 SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6))) RFv1 = rf1.predict(ligand) RFv2 = rf2.predict(ligand) RFv3 = rf3.predict(ligand) plec_score = plecscore.predict(ligand) nn_score = nn.predict([ligand]) VinaScore = ligand.data # This part is for concatenating all 5 poses try: # ligand = list(oddt.toolkit.readfile('pdbqt', ligand_location_inactives+k)) ligand = list(oddt.toolkit.readfile('sdf',f'sparse_{filename[:-5]}.sdf')) for m in [x for x in range(len(ligand)) if x != 0]: KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[m]) Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2 PLEC_v2 = oddt.fingerprints.PLEC(ligand[m], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True) Conc_PLEC += PLEC_v2 SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[m], protein, depth=1, size=4096, distance_cutoff=4.5) SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096) Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2 SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[m], protein, depth=2, size=4096, distance_cutoff=4.5) SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096) Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2 SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[m], protein, depth=3, size=4096, distance_cutoff=4.5) SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096) Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2 Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP))) Conc_PLEC = ''.join(str(list(Conc_PLEC))) Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2))) Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4))) Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6))) print("Concatenated poses succesfully! Number of poses: "+str(m)) except: print("Inactives. This file might not have 5 poses?") #Find and calculate SMILES with open(ligand_location_inactives+k,'r') as f: pdbqt_file = f.read() f.close() number_from_file = find_number(pdbqt_file, '.smi:') print(number_from_file) target_smiles = f'../../my_rp1_compounds_kinases/uniprot_kinase_{Type}s/{group}/{uniprot}_{Type}s.smi' with open(target_smiles, 'r') as f: smiles_file = f.readlines() f.close() SMILES = str(smiles_file[int(number_from_file[0])-1]).replace("\n","") print(SMILES) for r in range(len(csv_inactive_compounds)): if csv_inactive_compounds["standardised_smiles"][r] == SMILES and csv_inactive_compounds["accession"][r] == uniprot: pchembl_mean = csv_inactive_compounds["pchembl_value_Mean"][r] pchembl_median = csv_inactive_compounds["pchembl_value_Median"][r] break else: pchembl_mean = '' pchembl_median = '' pass count += 1 dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname print("Done appending to dataframe! Number appended", count, int(run_number)) gc.collect() if int(run_number) > 375: print("Going dark to preserve memory..") with open(f'config_{var_from}_{var_to}.txt', 'w') as f: to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to) print(to_write) f.write(to_write) f.close() dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv') dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv') os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to)) else: pass except: print("No pdbqt files available maybe?") if int(count) == -1: pass else: print("Going dark for the decoys..") with open(f'config_{var_from}_{var_to}.txt', 'w') as f: to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to) print(to_write) f.write(to_write) f.close() dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv') dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv') os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to)) # Decoys for loop here decoy_number = 0 for n in range(group_number, len(grouplist)): print("On decoys!") if int(decoy_number) >= 125: print("Going dark to preserve memory..") if int(group_number) > 8: next_from = int(var_from) + 1 if next_from == var_to: print("DONE WITH RUN") sys.exit() os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str("blub")) file_number, count, group_number = 0,-1,0 with open(f'config_{next_from}_{var_to}.txt', 'w') as f: to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(next_from)+"\n"+str(var_to) print(to_write) f.write(to_write) f.close() dataframe_SIFP.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv') dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}.csv') dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv') os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to)) with open(f'config_{var_from}_{var_to}.txt', 'w') as f: to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to) print(to_write) f.write(to_write) f.close() dataframe_SIFP.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv') dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}.csv') dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv') os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to)) else: group_number += 1 pass for o in sorted(os.listdir(decoy_location_1000+grouplist[n])): print("Decoys. Decoynumber - group_number", o, group_number) try: file_number += 1 run_number += 1 decoy_number += 1 ligandname = f'{decoy_location_1000}{grouplist[n]}/{o}' decoy_group = str(grouplist[n]) ligand = '' Type = "decoy" # ligand = next(oddt.toolkit.readfile('pdbqt', decoy_location_1000+grouplist[n]+"/"+o)) os.system(f'obabel -ipdbqt {decoy_location_1000}{grouplist[n]}/{o} -osdf -O sparse_{filename[:-5]}.sdf') ligand = next(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf')) KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand) Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP))) PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True) Conc_PLEC = PLEC PLEC = ''.join(str(list(PLEC))) SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5) SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096) Conc_SPLIF_ECFP2 = SPLIF_ECFP2 SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2))) SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5) SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096) Conc_SPLIF_ECFP4 = SPLIF_ECFP4 SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4))) SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5) SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096) Conc_SPLIF_ECFP6 = SPLIF_ECFP6 SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6))) RFv1 = rf1.predict(ligand) RFv2 = rf2.predict(ligand) RFv3 = rf3.predict(ligand) plec_score = plecscore.predict(ligand) nn_score = nn.predict([ligand]) VinaScore = ligand.data # This part is for concatenating all 5 poses try: # ligand = list(oddt.toolkit.readfile('pdbqt', decoy_location_1000+grouplist[n]+"/"+o)) ligand = list(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf')) for p in [x for x in range(len(ligand)) if x != 0]: KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[p]) Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2 PLEC_v2 = oddt.fingerprints.PLEC(ligand[p], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True) Conc_PLEC += PLEC_v2 SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=1, size=4096, distance_cutoff=4.5) SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096) Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2 SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=2, size=4096, distance_cutoff=4.5) SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096) Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2 SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=3, size=4096, distance_cutoff=4.5) SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096) Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2 Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP))) Conc_PLEC = ''.join(str(list(Conc_PLEC))) Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2))) Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4))) Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6))) print("Concatenated poses succesfully! Number of poses: "+str(p)) except: print("Decoys. This file might not have any poses?") #Dense to sparse for storage # PLEC = oddt.fingerprints.dense_to_sparse(PLEC, size=16384) # Conc_PLEC = oddt.fingerprints.dense_to_sparse(Conc_PLEC, size=16384) # SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP2, size=4096) # Conc_SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP2, size=4096) # SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP4, size=4096) # Conc_SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP4, size=4096) # SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP6, size=4096) # Conc_SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP6, size=4096) # This doesn't (yet) work for the decoys! # with open(decoy_location_1000+grouplist[n]+j,'r') as f: # pdbqt_file = f.read() # f.close() # number_from_file = find_number(pdbqt_file, '.smi:') # target_smiles = f'../my_rp1_compounds_kinases/{Type}s/1000/{group}_125.smi' # with open(target_smiles, 'r') as f: # smiles_file = f.readlines() # f.close() # SMILES = str(smiles_file[int(number_from_file[0])]).replace("\n","") SMILES,pchembl_mean,pchembl_median = '','','' print("On dataframe!") count += 1 dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count],dataframe_SIFP["decoy_group"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count],dataframe_PLEC["decoy_group"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group print("Done appending to dataframe! Number appended", count, int(run_number)) gc.collect() except: print("No pdbqt files available maybe?") # Decoys of 1625, IF available try: for o in sorted(os.listdir(decoy_location_1625+grouplist[n])): print("Decoys. Decoynumber - group_number", o, group_number) try: # file_number += 1 # run_number += 1 # decoy_number += 1 ligandname = f'{decoy_location_1625}{grouplist[n]}/{o}' decoy_group = str(grouplist[n]) ligand = '' Type = "decoy_v2" os.system(f'obabel -ipdbqt {decoy_location_1625}{grouplist[n]}/{o} -osdf -O sparse_{filename[:-5]}.sdf') ligand = next(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf')) KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand) Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP))) PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True) Conc_PLEC = PLEC PLEC = ''.join(str(list(PLEC))) SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5) SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096) Conc_SPLIF_ECFP2 = SPLIF_ECFP2 SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2))) SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5) SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096) Conc_SPLIF_ECFP4 = SPLIF_ECFP4 SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4))) SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5) SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096) Conc_SPLIF_ECFP6 = SPLIF_ECFP6 SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6))) RFv1 = rf1.predict(ligand) RFv2 = rf2.predict(ligand) RFv3 = rf3.predict(ligand) plec_score = plecscore.predict(ligand) nn_score = nn.predict([ligand]) VinaScore = ligand.data # This part is for concatenating all 5 poses try: # ligand = list(oddt.toolkit.readfile('pdbqt', decoy_location_1000+grouplist[n]+"/"+o)) ligand = list(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf')) for p in [x for x in range(len(ligand)) if x != 0]: KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[p]) Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2 PLEC_v2 = oddt.fingerprints.PLEC(ligand[p], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True) Conc_PLEC += PLEC_v2 SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=1, size=4096, distance_cutoff=4.5) SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096) Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2 SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=2, size=4096, distance_cutoff=4.5) SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096) Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2 SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=3, size=4096, distance_cutoff=4.5) SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096) Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2 Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP))) Conc_PLEC = ''.join(str(list(Conc_PLEC))) Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2))) Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4))) Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6))) print("Concatenated poses succesfully! Number of poses: "+str(p)) except: print("Decoys. This file might not have any poses?") SMILES,pchembl_mean,pchembl_median = '','','' print("On dataframe!") count += 1 dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count],dataframe_SIFP["decoy_group"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count],dataframe_PLEC["decoy_group"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group print("Done appending to dataframe! Number appended", count, int(run_number)) except: print("No pdbqt files available maybe?") except: print("No second decoy files available") if int(group_number) > 8: var_from = next_from else: pass if int(decoy_number) >= 125: print("Going dark to preserve memory..") with open(f'config_{var_from}_{var_to}.txt', 'w') as f: to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to) print(to_write) f.write(to_write) f.close() dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv') dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv') os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to)) else: pass except: print("There is a time to run and a time to error out") # ListFaultyStructures.append(filename) quit("No valid structure to calculate on") except: print("Maybe haven't downloaded ", filename,"moving on to next structure!") ListFaultyStructures.append(filename) next_from = int(var_from) + 1 if next_from == var_to: print("DONE WITH RUN") sys.exit() os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str("blub")) file_number, count, group_number = 0,-1,0 with open(f'config_{next_from}_{var_to}.txt', 'w') as f: to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(next_from)+"\n"+str(var_to) print(to_write) f.write(to_write) f.close() dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv') dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv') dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv') os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(next_from), str(var_to)) print("Done with run!") # dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv') # dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}.csv') # dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}.csv') # dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}.csv') # dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}.csv') # with open(f'../IFP_datasets/FaultyStructures_dense_{filename[:-5]}.txt', "w") as f: # f.writelines(ListFaultyStructures) # f.close() # rdkit.SimDivFilters.rdSimDivPickers.MaxMinPicker()