From 731466dc9a786a1a24e951530d88f29516bd3fed Mon Sep 17 00:00:00 2001
From: Jonathan Herrewijnen <jonathan.herrewijnen@gmail.com>
Date: Mon, 31 Oct 2022 14:42:35 +0100
Subject: [PATCH] IFP script added, others still required.

---
 create_IFP_datasets_v2_dense.py | 799 ++++++++++++++++++++++++++++++++
 1 file changed, 799 insertions(+)
 create mode 100644 create_IFP_datasets_v2_dense.py

diff --git a/create_IFP_datasets_v2_dense.py b/create_IFP_datasets_v2_dense.py
new file mode 100644
index 0000000..141d0c3
--- /dev/null
+++ b/create_IFP_datasets_v2_dense.py
@@ -0,0 +1,799 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed May 26 20:27:23 2021
+
+@author: Jonathan
+
+The concatenation is not actual concatenation! Its just a sum up.. (my bad)
+"""
+import os
+import pandas as pd
+pd.options.mode.chained_assignment = None  # default='warn'
+import oddt
+import oddt.interactions
+import oddt.fingerprints
+import oddt.scoring.descriptors.binana
+import oddt.docking.AutodockVina
+import rdkit
+from oddt.scoring.functions.RFScore import rfscore
+from oddt.scoring.functions.NNScore import nnscore
+from oddt.scoring.functions.PLECscore import PLECscore
+import pickle
+import re
+from rdkit import Chem
+import sys
+import gc
+import urllib.request
+
+def find_number(text, c):
+    return re.findall(r'%s(\d+)' % c, text)
+
+# Definitions created by Olivier Béquignon (Adding them to add more information to the dataset)
+def get_pocket(klifs_id):
+    '''Obtain the 85 residue binding pocket structure from the specified KLIFS structure ID
+
+    :param klifs_id: KLIFS structure ID
+    :return: ODDT protein structure of the binding pocket
+    '''
+    response = urllib.request.urlopen(f'https://klifs.net/api_v2/structure_get_pocket?structure_ID={klifs_id}')
+    data = response.read().decode()
+    pocket = oddt.toolkit.readstring('mol2', data)
+    pocket.protein = True
+    return pocket
+
+def get_ligand(klifs_id):
+    '''Obtain the ligand structure from the specified KLIFS structure ID
+
+    :param klifs_id: KLIFS structure ID
+    :return: ODDT ligand structure
+    '''
+    response = urllib.request.urlopen(f'https://klifs.net/api_v2/structure_get_ligand?structure_ID={klifs_id}')
+    data = response.read().decode()
+    if len(data):
+        return oddt.toolkit.readstring('mol2', data)
+    
+def get_pocket_IFP(klifs_id: int=None,
+                   pocket: oddt.toolkit.readfile=None,
+                   ligand: oddt.toolkit.readfile=None):
+    '''Obtain the interaction fingerprint of the ligand and protein pocket residues.
+
+    :param klifs_id: KLIFS structure ID (ignored if pocket and ligand provided)
+    :param pocket: ODDT pocket. If None, the KLIFS pocket is used
+    :param ligand: ODDT ligand.If None, the KLIFS ligand is used
+
+    :return: Binary IFP
+    '''
+    if klifs_id is None and pocket is None and ligand is None:
+        raise ValueError('Must at least provide a KLIFS ID if any of pocket and/or ligand is/are missing')
+    if ligand is None:
+        ligand = get_ligand(klifs_id)
+    if pocket is None:
+        pocket = get_pocket(klifs_id)
+    return oddt.fingerprints.InteractionFingerprint(ligand, pocket)
+
+def to_dense_fp(bits, size):
+    X = dict(list(map(int, x.split(': '))) for x in bits[1:-1].split(', '))
+    return [X[i] if i in X.keys() else 0 for i in range(size)]
+
+to_sparse_fp = lambda X: {i: x for i, x in enumerate(map(int, X[1:-1].split(', '))) if x > 0}
+
+n_in_range = 0 
+n_per_step = 1
+var_from = sys.argv[1]
+var_to = sys.argv[2]
+next_from = int(var_from) + 1
+
+print(var_from, var_to)
+if var_from == var_to:
+    print("var_from and var_to are the same, exiting!")
+    exit()
+
+try:
+    with open(f'config_{var_from}_{var_to}.txt', 'r') as f:
+        config_file = f.readlines()
+        file_number = int(config_file[0].replace("\n",""))
+        count = -1
+        group_number = int(config_file[2].replace("\n",""))
+        var_from = int(config_file[3].replace("\n",""))
+        var_to = int(config_file[4])
+        print("File number and count", file_number, count)
+    f.close()
+except:
+    print("No config file. Count is -1")
+    file_number = 0
+    count =-1
+    group_number = 0
+
+# Load in dataset and add empty columns for the Interaction Fingerprints
+csv_dataframe = pd.read_csv('../KLIFS_kinase_structure_data_selection_subselection_np.csv')
+csv_active_compounds = pd.read_csv('../../my_rp1_compounds_kinases/uniprot_kinase_actives/uniprot_kinase_actives.csv')
+csv_inactive_compounds = pd.read_csv('../../my_rp1_compounds_kinases/uniprot_kinase_inactives/uniprot_kinase_inactives.csv')
+
+dataframe_SIFP = pd.DataFrame()
+dataframe_SIFP["Type"],dataframe_SIFP["KLIFS_pocket_IFP"],dataframe_SIFP["Conc_KLIFS_pocket_IFP"],dataframe_SIFP["VinaScore"],dataframe_SIFP["RFv1"],dataframe_SIFP["RFv2"],dataframe_SIFP["RFv3"],dataframe_SIFP["nn_score"],dataframe_SIFP["plec_score"],dataframe_SIFP["SMILES"],dataframe_SIFP["pchembl_value_Mean"],dataframe_SIFP["pchembl_value_Median"],dataframe_SIFP["protein"],dataframe_SIFP["compound"],dataframe_SIFP["decoy_group"] = '','','','','','','','','','','','','','',''
+
+dataframe_PLEC = pd.DataFrame()
+dataframe_PLEC["Type"],dataframe_PLEC["PLEC"],dataframe_PLEC["Conc_PLEC"],dataframe_PLEC["VinaScore"],dataframe_PLEC["RFv1"],dataframe_PLEC["RFv2"],dataframe_PLEC["RFv3"],dataframe_PLEC["nn_score"],dataframe_PLEC["plec_score"],dataframe_PLEC["SMILES"],dataframe_PLEC["pchembl_value_Mean"],dataframe_PLEC["pchembl_value_Median"],dataframe_PLEC["protein"],dataframe_PLEC["compound"],dataframe_PLEC["decoy_group"] = '','','','','','','','','','','','','','',''
+
+dataframe_SPLIF_ECFP2 = pd.DataFrame()
+dataframe_SPLIF_ECFP2["Type"],dataframe_SPLIF_ECFP2["SPLIF_ECFP2"],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"],dataframe_SPLIF_ECFP2["VinaScore"],dataframe_SPLIF_ECFP2["RFv1"],dataframe_SPLIF_ECFP2["RFv2"],dataframe_SPLIF_ECFP2["RFv3"],dataframe_SPLIF_ECFP2["nn_score"],dataframe_SPLIF_ECFP2["plec_score"],dataframe_SPLIF_ECFP2["SMILES"],dataframe_SPLIF_ECFP2["pchembl_value_Mean"],dataframe_SPLIF_ECFP2["pchembl_value_Median"],dataframe_SPLIF_ECFP2["protein"],dataframe_SPLIF_ECFP2["compound"],dataframe_SPLIF_ECFP2["decoy_group"] = '','','','','','','','','','','','','','',''
+
+dataframe_SPLIF_ECFP4 = pd.DataFrame()
+dataframe_SPLIF_ECFP4["Type"],dataframe_SPLIF_ECFP4["SPLIF_ECFP4"],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"],dataframe_SPLIF_ECFP4["VinaScore"],dataframe_SPLIF_ECFP4["RFv1"],dataframe_SPLIF_ECFP4["RFv2"],dataframe_SPLIF_ECFP4["RFv3"],dataframe_SPLIF_ECFP4["nn_score"],dataframe_SPLIF_ECFP4["plec_score"],dataframe_SPLIF_ECFP4["SMILES"],dataframe_SPLIF_ECFP4["pchembl_value_Mean"],dataframe_SPLIF_ECFP4["pchembl_value_Median"],dataframe_SPLIF_ECFP4["protein"],dataframe_SPLIF_ECFP4["compound"],dataframe_SPLIF_ECFP4["decoy_group"] = '','','','','','','','','','','','','','',''
+
+dataframe_SPLIF_ECFP6 = pd.DataFrame()
+dataframe_SPLIF_ECFP6["Type"],dataframe_SPLIF_ECFP6["SPLIF_ECFP6"],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"],dataframe_SPLIF_ECFP6["VinaScore"],dataframe_SPLIF_ECFP6["RFv1"],dataframe_SPLIF_ECFP6["RFv2"],dataframe_SPLIF_ECFP6["RFv3"],dataframe_SPLIF_ECFP6["nn_score"],dataframe_SPLIF_ECFP6["plec_score"],dataframe_SPLIF_ECFP6["SMILES"],dataframe_SPLIF_ECFP6["pchembl_value_Mean"],dataframe_SPLIF_ECFP6["pchembl_value_Median"],dataframe_SPLIF_ECFP6["protein"],dataframe_SPLIF_ECFP6["compound"],dataframe_SPLIF_ECFP6["decoy_group"] = '','','','','','','','','','','','','','',''
+
+ListFaultyStructures = []
+
+# Iterate through my csv and select for each structure the folder with actives and the folder with inactives. Then calculate fingerprints.
+# for i in range(len(csv_dataframe["filename"])):
+# for i in range(0+n_in_range*n_per_step, n_per_step+n_in_range*n_per_step):
+for i in range(int(var_from), int(var_to)):
+    filename = csv_dataframe["filename"][i]
+    group = csv_dataframe["group"][i]
+    kinase_ID = csv_dataframe["kinase_ID"][i]
+    structure_ID = csv_dataframe["structure_ID"][i]
+    uniprot = csv_dataframe["uniprot"][i]
+    klifs_id = csv_dataframe["structure_ID"][i]
+    print(filename, group, kinase_ID, structure_ID, uniprot)
+    os.system(f'mkdir ../IFP_datasets/SIFP_v2/{filename[:-5]}')
+    os.system(f'mkdir ../IFP_datasets/PLEC_v2/{filename[:-5]}')
+    os.system(f'mkdir ../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}')
+    os.system(f'mkdir ../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}')
+    os.system(f'mkdir ../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}')
+    
+    try:
+        protein = ''
+        try:
+            os.system(f'obabel -ipdbqt ../../my_rp1_compounds_kinases/selected_mol2structures/{group}_pdbqt/{filename[:-5]}.pdbqt -opdb -O sparse_{filename[:-5]}.pdb')
+            protein = next(oddt.toolkit.readfile('pdb', f'sparse_{filename[:-5]}.pdb'))
+            proteinname = f'../../my_rp1_compounds_kinases/selected_mol2structures/{group}_pdbqt/{filename[:-5]}.pdbqt'
+            print("loaded in structure on 1st try")
+        except:
+            print("On except..")
+            ListFaultyStructures.append("Unsanitized! ",filename)
+            protein = next(oddt.toolkit.readfile('pdb', f'sparse_{filename[:-5]}.pdb', sanitize=False))
+            print("Unsanitized! ", filename)
+        print("loaded in protein succesfully!")
+        protein.protein = True
+        
+        rf1 = rfscore.load(version=1)
+        with open('pickles/rf1.pickle', 'wb') as f:
+            pickle.dump(rf1, f)
+        f.close()
+        rf2 = rfscore.load(version=2)
+        with open('pickles/rf2.pickle', 'wb') as f:
+            pickle.dump(rf2, f)
+        f.close()    
+        rf3 = rfscore.load(version=3)
+        with open('pickles/rf3.pickle', 'wb') as f:
+            pickle.dump(rf3, f)
+        f.close()
+        nn = nnscore.load()
+        with open('pickles/nn.pickle', 'wb') as f:
+            pickle.dump(nn, f)
+        f.close()
+        plecscore = PLECscore.load()
+        with open('pickles/plecscore.pickle', 'wb') as f:
+            pickle.dump(plecscore, f)
+        f.close()
+        
+        rf1.set_protein(protein)
+        rf2.set_protein(protein)
+        rf3.set_protein(protein)
+        nn.set_protein(protein)
+        plecscore.set_protein(protein)
+        
+        # KLIFS retrieving pocket for SIFP
+        pocket = get_pocket(klifs_id)
+    
+        try:
+            ligand_location_actives = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/actives/"
+            ligand_location_inactives = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/inactives/"
+            decoy_location_1000 = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/decoys/"
+            decoy_location_1625 = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/decoys1625/"
+            run_number = 0
+            number_of_actives = len(os.listdir(ligand_location_actives))
+            number_of_inactives = len(os.listdir(ligand_location_inactives))
+            grouplist = ["AGC","Atypical","CAMK","CK1","CMGC","STE","TK","TKL"]
+
+            for j in sorted(os.listdir(ligand_location_actives))[file_number:]:
+                print("Actives ", j, number_of_actives)
+                print(file_number, number_of_actives)
+                if file_number >= number_of_actives:
+                    pass
+                else:
+                    try:
+                        file_number += 1
+                        run_number += 1
+                        group_number = 0
+                        ligandname = f'{ligand_location_actives}{j}'
+                        
+                        # ligand = next(oddt.toolkit.readfile('pdbqt', ligand_location_actives+j))
+                        os.system(f'obabel -ipdbqt {ligand_location_actives}{j} -osdf -O sparse_{filename[:-5]}.sdf')
+                        ligand = next(oddt.toolkit.readfile('sdf',f'sparse_{filename[:-5]}.sdf'))
+                        Type = "active"
+                        
+                        # IFP = oddt.fingerprints.InteractionFingerprint(ligand, protein)
+                        # SIFP = oddt.fingerprints.SimpleInteractionFingerprint(ligand,protein)
+                        KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand)
+                        Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP
+                        KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP)))
+                        
+                        PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
+                        Conc_PLEC = PLEC
+                        PLEC = ''.join(str(list(PLEC)))
+
+                        SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5)
+                        SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096)
+                        Conc_SPLIF_ECFP2 = SPLIF_ECFP2
+                        SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2)))
+
+                        SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5)
+                        SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096)
+                        Conc_SPLIF_ECFP4 = SPLIF_ECFP4
+                        SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4)))
+                        
+                        SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5)
+                        SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096)
+                        Conc_SPLIF_ECFP6 = SPLIF_ECFP6
+                        SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6)))
+                        
+                        RFv1 = rf1.predict(ligand)
+                        RFv2 = rf2.predict(ligand)
+                        RFv3 = rf3.predict(ligand)
+                        plec_score = plecscore.predict(ligand)
+                        nn_score = nn.predict([ligand])
+                        VinaScore = ligand.data
+                        
+                        # This part is for concatenating all 5 poses
+                        try:
+                            # ligand = list(oddt.toolkit.readfile('pdbqt', ligand_location_actives+j))
+                            ligand = list(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf'))
+                            for l in [x for x in range(len(ligand)) if x != 0]:
+                                KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[l])
+                                Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2
+
+                                PLEC_v2 = oddt.fingerprints.PLEC(ligand[l], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
+                                Conc_PLEC += PLEC_v2
+
+                                SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[l], protein, depth=1, size=4096, distance_cutoff=4.5)
+                                SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096)
+                                Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2
+                                
+                                SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[l], protein, depth=2, size=4096, distance_cutoff=4.5)
+                                SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096)
+                                Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2
+                                
+                                SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[l], protein, depth=3, size=4096, distance_cutoff=4.5)
+                                SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096)
+                                Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2
+                            Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP)))
+                            Conc_PLEC = ''.join(str(list(Conc_PLEC)))
+                            Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2)))
+                            Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4)))
+                            Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6)))
+                            print("Concatenated poses succesfully! Number of poses: "+str(l))
+                        except:
+                            print("Concat error!") 
+                            pass
+                        
+                        #Dense to sparse for storage
+                        # PLEC = PLEC.apply(to_sparse_fp)
+                        # Conc_PLEC = oddt.fingerprints.dense_to_sparse(Conc_PLEC)
+                        # print(len(PLEC))
+                        # SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP2, size=4096)
+                        # Conc_SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP2, size=4096)
+                        # SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP4, size=4096)
+                        # Conc_SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP4, size=4096)
+                        # SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP6, size=4096)
+                        # Conc_SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP6, size=4096)
+                        
+                        #Find and calculate SMILES
+                        with open(ligand_location_actives+j,'r') as f:
+                            pdbqt_file = f.read()
+                        f.close()
+                        number_from_file = find_number(pdbqt_file, '.smi:')
+                        print(number_from_file)
+                        target_smiles = f'../../my_rp1_compounds_kinases/uniprot_kinase_{Type}s/{group}/{uniprot}_{Type}s.smi'
+                        with open(target_smiles, 'r') as f:
+                            smiles_file = f.readlines()
+                        f.close()
+                        SMILES = str(smiles_file[int(number_from_file[0])-1]).replace("\n","")
+                        print(SMILES)
+                        
+                        for q in range(len(csv_active_compounds)):
+                            if csv_active_compounds["standardised_smiles"][q] == SMILES and csv_active_compounds["accession"][q] == uniprot:
+                                pchembl_mean = csv_active_compounds["pchembl_value_Mean"][q]
+                                pchembl_median = csv_active_compounds["pchembl_value_Median"][q]
+                                break
+                            else:
+                                pchembl_mean = ''
+                                pchembl_median = ''
+                                pass
+                        
+                        # Conc_PLEC,Conc_SPLIF_ECFP2,Conc_SPLIF_ECFP4,Conc_SPLIF_ECFP6,PLEC_v2,SPLIF_ECFP2_v2,SPLIF_ECFP4_v2,SPLIF_ECFP6_v2 ='','','','','','','',''
+                        
+                        count += 1
+                        dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
+                        dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
+                        dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
+                        dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
+                        dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
+                        print("Done appending to dataframe! Number appended", count, int(run_number))
+                        
+                        if int(run_number) > 375:
+                            print("Going dark to preserve memory..")
+                            with open(f'config_{var_from}_{var_to}.txt', 'w') as f:
+                                to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to)
+                                print(to_write)
+                                f.write(to_write)
+                            f.close()
+                            dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
+                            dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
+                            dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
+                            dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
+                            dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
+                            os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
+                        else:
+                            pass
+                        gc.collect()
+                        
+                        # del(ligand,SMILES,RFv1,RFv2,RFv3,PLEC,SPLIF_ECFP2,SPLIF_ECFP4,SPLIF_ECFP6,Conc_PLEC,Conc_SPLIF_ECFP2,Conc_SPLIF_ECFP4,Conc_SPLIF_ECFP6,VinaScore,PLEC_v2,SPLIF_ECFP2_v2,SPLIF_ECFP4_v2,SPLIF_ECFP6_v2)
+
+                        # Other descriptors I could use later on?
+                        # protein_atoms, ligand_atoms, strict = oddt.interactions.hbonds(protein, ligand)
+                        # print(protein_atoms['resname'])
+                        # wut = oddt.interactions.close_contacts(protein_atoms, ligand_atoms, cutoff=4, x_column='coords', y_column='coords')
+                    except:
+                        print("Actives. Errors for some reason..")
+            print("Going to inactives.. ")
+            for k in sorted(os.listdir(ligand_location_inactives))[(file_number-number_of_actives):]:
+                print("Inactives ", k, number_of_inactives)
+                if int(file_number) >= (int(number_of_actives)+int(number_of_inactives)):
+                    print("passing! len actives + actives is: ", str(number_of_actives), str(number_of_inactives))
+                    pass
+                else:
+                    try:
+                        file_number += 1
+                        run_number += 1
+                        group_number = 0
+                        ligandname = f'{ligand_location_inactives}{k}'
+
+                        ligand = ''
+                        Type = "inactive"
+                        # ligand = next(oddt.toolkit.readfile('pdbqt', ligand_location_inactives+k))
+                        os.system(f'obabel -ipdbqt {ligand_location_inactives}{k} -osdf -O sparse_{filename[:-5]}.sdf')
+                        ligand = next(oddt.toolkit.readfile('sdf',f'sparse_{filename[:-5]}.sdf'))
+                        
+                        KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand)
+                        Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP
+                        KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP)))
+                        
+                        PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
+                        Conc_PLEC = PLEC
+                        PLEC = ''.join(str(list(PLEC)))
+                        
+                        SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5)
+                        SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096)
+                        Conc_SPLIF_ECFP2 = SPLIF_ECFP2
+                        SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2)))
+                        
+                        SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5)
+                        SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096)
+                        Conc_SPLIF_ECFP4 = SPLIF_ECFP4
+                        SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4)))
+                        
+                        SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5)
+                        SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096)
+                        Conc_SPLIF_ECFP6 = SPLIF_ECFP6
+                        SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6)))
+                        
+                        RFv1 = rf1.predict(ligand)
+                        RFv2 = rf2.predict(ligand)
+                        RFv3 = rf3.predict(ligand)
+                        plec_score = plecscore.predict(ligand)
+                        nn_score = nn.predict([ligand])
+                        VinaScore = ligand.data
+                        
+                        # This part is for concatenating all 5 poses
+                        try:
+                            # ligand = list(oddt.toolkit.readfile('pdbqt', ligand_location_inactives+k))
+                            ligand = list(oddt.toolkit.readfile('sdf',f'sparse_{filename[:-5]}.sdf'))
+                            for m in [x for x in range(len(ligand)) if x != 0]:
+                                KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[m])
+                                Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2
+
+                                PLEC_v2 = oddt.fingerprints.PLEC(ligand[m], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
+                                Conc_PLEC += PLEC_v2
+
+                                SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[m], protein, depth=1, size=4096, distance_cutoff=4.5)
+                                SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096)
+                                Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2
+                                
+                                SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[m], protein, depth=2, size=4096, distance_cutoff=4.5)
+                                SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096)
+                                Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2
+                                
+                                SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[m], protein, depth=3, size=4096, distance_cutoff=4.5)
+                                SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096)
+                                Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2
+                            Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP)))
+                            Conc_PLEC = ''.join(str(list(Conc_PLEC)))
+                            Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2)))
+                            Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4)))
+                            Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6)))
+                            print("Concatenated poses succesfully! Number of poses: "+str(m))
+                        except:
+                            print("Inactives. This file might not have 5 poses?")
+                        
+                        #Find and calculate SMILES
+                        with open(ligand_location_inactives+k,'r') as f:
+                            pdbqt_file = f.read()
+                        f.close()
+                        number_from_file = find_number(pdbqt_file, '.smi:')
+                        print(number_from_file)
+                        target_smiles = f'../../my_rp1_compounds_kinases/uniprot_kinase_{Type}s/{group}/{uniprot}_{Type}s.smi'
+                        with open(target_smiles, 'r') as f:
+                            smiles_file = f.readlines()
+                        f.close()
+                        SMILES = str(smiles_file[int(number_from_file[0])-1]).replace("\n","")
+                        print(SMILES)
+                        
+                        for r in range(len(csv_inactive_compounds)):
+                            if csv_inactive_compounds["standardised_smiles"][r] == SMILES and csv_inactive_compounds["accession"][r] == uniprot:
+                                pchembl_mean = csv_inactive_compounds["pchembl_value_Mean"][r]
+                                pchembl_median = csv_inactive_compounds["pchembl_value_Median"][r]
+                                break
+                            else:
+                                pchembl_mean = ''
+                                pchembl_median = ''
+                                pass
+                        
+                        count += 1
+                        dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
+                        dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
+                        dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
+                        dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
+                        dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
+                        print("Done appending to dataframe! Number appended", count, int(run_number))
+                        
+                        gc.collect()
+                        if int(run_number) > 375:
+                            print("Going dark to preserve memory..")
+                            with open(f'config_{var_from}_{var_to}.txt', 'w') as f:
+                                to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to)
+                                print(to_write)
+                                f.write(to_write)
+                            f.close()
+                            dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
+                            dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
+                            dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
+                            dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
+                            dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
+                            os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
+                        else:
+                            pass
+                        
+                    except:
+                        print("No pdbqt files available maybe?")
+                        
+            if int(count) == -1:
+                pass
+            else:
+                print("Going dark for the decoys..")
+                with open(f'config_{var_from}_{var_to}.txt', 'w') as f:
+                    to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to)
+                    print(to_write)
+                    f.write(to_write)
+                f.close()
+                dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
+                dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
+                dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
+                dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
+                dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
+                os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
+            
+            # Decoys for loop here
+            decoy_number = 0
+            for n in range(group_number, len(grouplist)):
+                print("On decoys!")
+                
+                if int(decoy_number) >= 125:
+                    print("Going dark to preserve memory..")
+                    if int(group_number) > 8:
+                        next_from = int(var_from) + 1
+                        if next_from == var_to:
+                            print("DONE WITH RUN")
+                            sys.exit()
+                            os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str("blub"))
+                            file_number, count, group_number = 0,-1,0
+                            with open(f'config_{next_from}_{var_to}.txt', 'w') as f:
+                                to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(next_from)+"\n"+str(var_to)
+                                print(to_write)
+                                f.write(to_write)
+                            f.close()
+                        dataframe_SIFP.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
+                        dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}.csv')
+                        dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
+                        dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
+                        dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
+                        os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
+                    with open(f'config_{var_from}_{var_to}.txt', 'w') as f:
+                        to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to)
+                        print(to_write)
+                        f.write(to_write)
+                    f.close()
+                    dataframe_SIFP.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
+                    dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}.csv')
+                    dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
+                    dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
+                    dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
+                    os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
+                else:
+                    group_number += 1
+                    pass
+                
+                for o in sorted(os.listdir(decoy_location_1000+grouplist[n])):
+                    print("Decoys. Decoynumber - group_number", o, group_number)
+                    try:
+                        file_number += 1
+                        run_number += 1
+                        decoy_number += 1
+                        ligandname = f'{decoy_location_1000}{grouplist[n]}/{o}'
+                        decoy_group = str(grouplist[n])
+
+                        ligand = ''
+                        Type = "decoy"
+                        # ligand = next(oddt.toolkit.readfile('pdbqt', decoy_location_1000+grouplist[n]+"/"+o))
+                        os.system(f'obabel -ipdbqt {decoy_location_1000}{grouplist[n]}/{o} -osdf -O sparse_{filename[:-5]}.sdf')
+                        ligand = next(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf'))
+                        
+                        KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand)
+                        Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP
+                        KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP)))
+                        
+                        PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
+                        Conc_PLEC = PLEC
+                        PLEC = ''.join(str(list(PLEC)))
+                        
+                        SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5)
+                        SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096)
+                        Conc_SPLIF_ECFP2 = SPLIF_ECFP2
+                        SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2)))
+                        
+                        SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5)
+                        SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096)
+                        Conc_SPLIF_ECFP4 = SPLIF_ECFP4
+                        SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4)))
+                        
+                        SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5)
+                        SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096)
+                        Conc_SPLIF_ECFP6 = SPLIF_ECFP6
+                        SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6)))
+                        
+                        RFv1 = rf1.predict(ligand)
+                        RFv2 = rf2.predict(ligand)
+                        RFv3 = rf3.predict(ligand)
+                        plec_score = plecscore.predict(ligand)
+                        nn_score = nn.predict([ligand])
+                        VinaScore = ligand.data
+                        
+                        # This part is for concatenating all 5 poses
+                        try:
+                            # ligand = list(oddt.toolkit.readfile('pdbqt', decoy_location_1000+grouplist[n]+"/"+o))
+                            ligand = list(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf'))
+                            for p in [x for x in range(len(ligand)) if x != 0]:
+                                KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[p])
+                                Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2
+
+                                PLEC_v2 = oddt.fingerprints.PLEC(ligand[p], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
+                                Conc_PLEC += PLEC_v2
+
+                                SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=1, size=4096, distance_cutoff=4.5)
+                                SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096)
+                                Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2
+                                
+                                SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=2, size=4096, distance_cutoff=4.5)
+                                SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096)
+                                Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2
+                                
+                                SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=3, size=4096, distance_cutoff=4.5)
+                                SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096)
+                                Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2
+                            Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP)))
+                            Conc_PLEC = ''.join(str(list(Conc_PLEC)))
+                            Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2)))
+                            Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4)))
+                            Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6)))
+                            print("Concatenated poses succesfully! Number of poses: "+str(p))
+                            
+                        except:
+                            print("Decoys. This file might not have any poses?")
+                        
+                        #Dense to sparse for storage
+                        # PLEC = oddt.fingerprints.dense_to_sparse(PLEC, size=16384)
+                        # Conc_PLEC = oddt.fingerprints.dense_to_sparse(Conc_PLEC, size=16384)
+                        # SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP2, size=4096)
+                        # Conc_SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP2, size=4096)
+                        # SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP4, size=4096)
+                        # Conc_SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP4, size=4096)
+                        # SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP6, size=4096)
+                        # Conc_SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP6, size=4096)
+                        
+                        # This doesn't (yet) work for the decoys!
+                        # with open(decoy_location_1000+grouplist[n]+j,'r') as f:
+                        #     pdbqt_file = f.read()
+                        # f.close()
+                        # number_from_file = find_number(pdbqt_file, '.smi:')
+                        # target_smiles = f'../my_rp1_compounds_kinases/{Type}s/1000/{group}_125.smi'
+                        # with open(target_smiles, 'r') as f:
+                        #     smiles_file = f.readlines()
+                        # f.close()
+                        # SMILES = str(smiles_file[int(number_from_file[0])]).replace("\n","")
+                        SMILES,pchembl_mean,pchembl_median = '','',''
+                        print("On dataframe!")
+                        
+                        count += 1
+                        dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count],dataframe_SIFP["decoy_group"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
+                        dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count],dataframe_PLEC["decoy_group"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
+                        dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
+                        dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
+                        dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
+                        print("Done appending to dataframe! Number appended", count, int(run_number))
+                        
+                        gc.collect()
+                    except:
+                        print("No pdbqt files available maybe?")
+
+                # Decoys of 1625, IF available
+                try:
+                    for o in sorted(os.listdir(decoy_location_1625+grouplist[n])):
+                        print("Decoys. Decoynumber - group_number", o, group_number)
+                        try:
+                            # file_number += 1
+                            # run_number += 1
+                            # decoy_number += 1
+                            ligandname = f'{decoy_location_1625}{grouplist[n]}/{o}'
+                            decoy_group = str(grouplist[n])
+
+                            ligand = ''
+                            Type = "decoy_v2"
+                            os.system(f'obabel -ipdbqt {decoy_location_1625}{grouplist[n]}/{o} -osdf -O sparse_{filename[:-5]}.sdf')
+                            ligand = next(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf'))
+                            
+                            KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand)
+                            Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP
+                            KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP)))
+                            
+                            PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
+                            Conc_PLEC = PLEC
+                            PLEC = ''.join(str(list(PLEC)))
+                            
+                            SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5)
+                            SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096)
+                            Conc_SPLIF_ECFP2 = SPLIF_ECFP2
+                            SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2)))
+                            
+                            SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5)
+                            SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096)
+                            Conc_SPLIF_ECFP4 = SPLIF_ECFP4
+                            SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4)))
+                            
+                            SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5)
+                            SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096)
+                            Conc_SPLIF_ECFP6 = SPLIF_ECFP6
+                            SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6)))
+                            
+                            RFv1 = rf1.predict(ligand)
+                            RFv2 = rf2.predict(ligand)
+                            RFv3 = rf3.predict(ligand)
+                            plec_score = plecscore.predict(ligand)
+                            nn_score = nn.predict([ligand])
+                            VinaScore = ligand.data
+                            
+                            # This part is for concatenating all 5 poses
+                            try:
+                                # ligand = list(oddt.toolkit.readfile('pdbqt', decoy_location_1000+grouplist[n]+"/"+o))
+                                ligand = list(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf'))
+                                for p in [x for x in range(len(ligand)) if x != 0]:
+                                    KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[p])
+                                    Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2
+
+                                    PLEC_v2 = oddt.fingerprints.PLEC(ligand[p], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
+                                    Conc_PLEC += PLEC_v2
+
+                                    SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=1, size=4096, distance_cutoff=4.5)
+                                    SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096)
+                                    Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2
+                                    
+                                    SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=2, size=4096, distance_cutoff=4.5)
+                                    SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096)
+                                    Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2
+                                    
+                                    SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=3, size=4096, distance_cutoff=4.5)
+                                    SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096)
+                                    Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2
+                                Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP)))
+                                Conc_PLEC = ''.join(str(list(Conc_PLEC)))
+                                Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2)))
+                                Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4)))
+                                Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6)))
+                                print("Concatenated poses succesfully! Number of poses: "+str(p))
+                                
+                            except:
+                                print("Decoys. This file might not have any poses?")
+                            
+                            SMILES,pchembl_mean,pchembl_median = '','',''
+                            print("On dataframe!")
+                            
+                            count += 1
+                            dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count],dataframe_SIFP["decoy_group"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
+                            dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count],dataframe_PLEC["decoy_group"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
+                            dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
+                            dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
+                            dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
+                            print("Done appending to dataframe! Number appended", count, int(run_number))
+                        except:
+                            print("No pdbqt files available maybe?")
+                        
+                        
+                except:
+                    print("No second decoy files available")
+
+                if int(group_number) > 8:
+                            var_from = next_from
+                else:
+                    pass
+                
+                if int(decoy_number) >= 125:
+                    print("Going dark to preserve memory..")
+                    with open(f'config_{var_from}_{var_to}.txt', 'w') as f:
+                        to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to)
+                        print(to_write)
+                        f.write(to_write)
+                    f.close()
+                    dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
+                    dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
+                    dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
+                    dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
+                    dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
+                    os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
+                else:
+                    pass     
+        except:
+            print("There is a time to run and a time to error out")
+            # ListFaultyStructures.append(filename)
+            quit("No valid structure to calculate on")
+    except:
+        print("Maybe haven't downloaded ", filename,"moving on to next structure!")
+        ListFaultyStructures.append(filename)
+        next_from = int(var_from) + 1
+        if next_from == var_to:
+            print("DONE WITH RUN")
+            sys.exit()
+            os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str("blub"))
+        file_number, count, group_number = 0,-1,0
+        with open(f'config_{next_from}_{var_to}.txt', 'w') as f:
+            to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(next_from)+"\n"+str(var_to)
+            print(to_write)
+            f.write(to_write)
+        f.close()
+        dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
+        dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
+        dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
+        dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
+        dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
+        os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(next_from), str(var_to))
+
+print("Done with run!")
+# dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
+# dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}.csv')
+# dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}.csv')
+# dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}.csv')
+# dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}.csv')
+# with open(f'../IFP_datasets/FaultyStructures_dense_{filename[:-5]}.txt', "w") as f:
+#     f.writelines(ListFaultyStructures)
+# f.close()
+# rdkit.SimDivFilters.rdSimDivPickers.MaxMinPicker()