Cheminformatics/RP1/create_IFP_datasets_v2_dense.py

# -*- coding: utf-8 -*-
"""
Created on Wed May 26 20:27:23 2021

@author: Jonathan

The concatenation is not actual concatenation! Its just a sum up.. (my bad)
"""
import os
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import oddt
import oddt.interactions
import oddt.fingerprints
import oddt.scoring.descriptors.binana
import oddt.docking.AutodockVina
import rdkit
from oddt.scoring.functions.RFScore import rfscore
from oddt.scoring.functions.NNScore import nnscore
from oddt.scoring.functions.PLECscore import PLECscore
import pickle
import re
from rdkit import Chem
import sys
import gc
import urllib.request

def find_number(text, c):
    return re.findall(r'%s(\d+)' % c, text)

# Definitions created by Olivier Béquignon (Adding them to add more information to the dataset)
def get_pocket(klifs_id):
    '''Obtain the 85 residue binding pocket structure from the specified KLIFS structure ID

    :param klifs_id: KLIFS structure ID
    :return: ODDT protein structure of the binding pocket
    '''
    response = urllib.request.urlopen(f'https://klifs.net/api_v2/structure_get_pocket?structure_ID={klifs_id}')
    data = response.read().decode()
    pocket = oddt.toolkit.readstring('mol2', data)
    pocket.protein = True
    return pocket

def get_ligand(klifs_id):
    '''Obtain the ligand structure from the specified KLIFS structure ID

    :param klifs_id: KLIFS structure ID
    :return: ODDT ligand structure
    '''
    response = urllib.request.urlopen(f'https://klifs.net/api_v2/structure_get_ligand?structure_ID={klifs_id}')
    data = response.read().decode()
    if len(data):
        return oddt.toolkit.readstring('mol2', data)

def get_pocket_IFP(klifs_id: int=None,
                   pocket: oddt.toolkit.readfile=None,
                   ligand: oddt.toolkit.readfile=None):
    '''Obtain the interaction fingerprint of the ligand and protein pocket residues.

    :param klifs_id: KLIFS structure ID (ignored if pocket and ligand provided)
    :param pocket: ODDT pocket. If None, the KLIFS pocket is used
    :param ligand: ODDT ligand.If None, the KLIFS ligand is used

    :return: Binary IFP
    '''
    if klifs_id is None and pocket is None and ligand is None:
        raise ValueError('Must at least provide a KLIFS ID if any of pocket and/or ligand is/are missing')
    if ligand is None:
        ligand = get_ligand(klifs_id)
    if pocket is None:
        pocket = get_pocket(klifs_id)
    return oddt.fingerprints.InteractionFingerprint(ligand, pocket)

def to_dense_fp(bits, size):
    X = dict(list(map(int, x.split(': '))) for x in bits[1:-1].split(', '))
    return [X[i] if i in X.keys() else 0 for i in range(size)]

to_sparse_fp = lambda X: {i: x for i, x in enumerate(map(int, X[1:-1].split(', '))) if x > 0}

n_in_range = 0
n_per_step = 1
var_from = sys.argv[1]
var_to = sys.argv[2]
next_from = int(var_from) + 1

print(var_from, var_to)
if var_from == var_to:
    print("var_from and var_to are the same, exiting!")
    exit()

try:
    with open(f'config_{var_from}_{var_to}.txt', 'r') as f:
        config_file = f.readlines()
        file_number = int(config_file[0].replace("\n",""))
        count = -1
        group_number = int(config_file[2].replace("\n",""))
        var_from = int(config_file[3].replace("\n",""))
        var_to = int(config_file[4])
        print("File number and count", file_number, count)
    f.close()
except:
    print("No config file. Count is -1")
    file_number = 0
    count =-1
    group_number = 0

# Load in dataset and add empty columns for the Interaction Fingerprints
csv_dataframe = pd.read_csv('../KLIFS_kinase_structure_data_selection_subselection_np.csv')
csv_active_compounds = pd.read_csv('../../my_rp1_compounds_kinases/uniprot_kinase_actives/uniprot_kinase_actives.csv')
csv_inactive_compounds = pd.read_csv('../../my_rp1_compounds_kinases/uniprot_kinase_inactives/uniprot_kinase_inactives.csv')

dataframe_SIFP = pd.DataFrame()
dataframe_SIFP["Type"],dataframe_SIFP["KLIFS_pocket_IFP"],dataframe_SIFP["Conc_KLIFS_pocket_IFP"],dataframe_SIFP["VinaScore"],dataframe_SIFP["RFv1"],dataframe_SIFP["RFv2"],dataframe_SIFP["RFv3"],dataframe_SIFP["nn_score"],dataframe_SIFP["plec_score"],dataframe_SIFP["SMILES"],dataframe_SIFP["pchembl_value_Mean"],dataframe_SIFP["pchembl_value_Median"],dataframe_SIFP["protein"],dataframe_SIFP["compound"],dataframe_SIFP["decoy_group"] = '','','','','','','','','','','','','','',''

dataframe_PLEC = pd.DataFrame()
dataframe_PLEC["Type"],dataframe_PLEC["PLEC"],dataframe_PLEC["Conc_PLEC"],dataframe_PLEC["VinaScore"],dataframe_PLEC["RFv1"],dataframe_PLEC["RFv2"],dataframe_PLEC["RFv3"],dataframe_PLEC["nn_score"],dataframe_PLEC["plec_score"],dataframe_PLEC["SMILES"],dataframe_PLEC["pchembl_value_Mean"],dataframe_PLEC["pchembl_value_Median"],dataframe_PLEC["protein"],dataframe_PLEC["compound"],dataframe_PLEC["decoy_group"] = '','','','','','','','','','','','','','',''

dataframe_SPLIF_ECFP2 = pd.DataFrame()
dataframe_SPLIF_ECFP2["Type"],dataframe_SPLIF_ECFP2["SPLIF_ECFP2"],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"],dataframe_SPLIF_ECFP2["VinaScore"],dataframe_SPLIF_ECFP2["RFv1"],dataframe_SPLIF_ECFP2["RFv2"],dataframe_SPLIF_ECFP2["RFv3"],dataframe_SPLIF_ECFP2["nn_score"],dataframe_SPLIF_ECFP2["plec_score"],dataframe_SPLIF_ECFP2["SMILES"],dataframe_SPLIF_ECFP2["pchembl_value_Mean"],dataframe_SPLIF_ECFP2["pchembl_value_Median"],dataframe_SPLIF_ECFP2["protein"],dataframe_SPLIF_ECFP2["compound"],dataframe_SPLIF_ECFP2["decoy_group"] = '','','','','','','','','','','','','','',''

dataframe_SPLIF_ECFP4 = pd.DataFrame()
dataframe_SPLIF_ECFP4["Type"],dataframe_SPLIF_ECFP4["SPLIF_ECFP4"],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"],dataframe_SPLIF_ECFP4["VinaScore"],dataframe_SPLIF_ECFP4["RFv1"],dataframe_SPLIF_ECFP4["RFv2"],dataframe_SPLIF_ECFP4["RFv3"],dataframe_SPLIF_ECFP4["nn_score"],dataframe_SPLIF_ECFP4["plec_score"],dataframe_SPLIF_ECFP4["SMILES"],dataframe_SPLIF_ECFP4["pchembl_value_Mean"],dataframe_SPLIF_ECFP4["pchembl_value_Median"],dataframe_SPLIF_ECFP4["protein"],dataframe_SPLIF_ECFP4["compound"],dataframe_SPLIF_ECFP4["decoy_group"] = '','','','','','','','','','','','','','',''

dataframe_SPLIF_ECFP6 = pd.DataFrame()
dataframe_SPLIF_ECFP6["Type"],dataframe_SPLIF_ECFP6["SPLIF_ECFP6"],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"],dataframe_SPLIF_ECFP6["VinaScore"],dataframe_SPLIF_ECFP6["RFv1"],dataframe_SPLIF_ECFP6["RFv2"],dataframe_SPLIF_ECFP6["RFv3"],dataframe_SPLIF_ECFP6["nn_score"],dataframe_SPLIF_ECFP6["plec_score"],dataframe_SPLIF_ECFP6["SMILES"],dataframe_SPLIF_ECFP6["pchembl_value_Mean"],dataframe_SPLIF_ECFP6["pchembl_value_Median"],dataframe_SPLIF_ECFP6["protein"],dataframe_SPLIF_ECFP6["compound"],dataframe_SPLIF_ECFP6["decoy_group"] = '','','','','','','','','','','','','','',''

ListFaultyStructures = []

# Iterate through my csv and select for each structure the folder with actives and the folder with inactives. Then calculate fingerprints.
# for i in range(len(csv_dataframe["filename"])):
# for i in range(0+n_in_range*n_per_step, n_per_step+n_in_range*n_per_step):
for i in range(int(var_from), int(var_to)):
    filename = csv_dataframe["filename"][i]
    group = csv_dataframe["group"][i]
    kinase_ID = csv_dataframe["kinase_ID"][i]
    structure_ID = csv_dataframe["structure_ID"][i]
    uniprot = csv_dataframe["uniprot"][i]
    klifs_id = csv_dataframe["structure_ID"][i]
    print(filename, group, kinase_ID, structure_ID, uniprot)
    os.system(f'mkdir ../IFP_datasets/SIFP_v2/{filename[:-5]}')
    os.system(f'mkdir ../IFP_datasets/PLEC_v2/{filename[:-5]}')
    os.system(f'mkdir ../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}')
    os.system(f'mkdir ../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}')
    os.system(f'mkdir ../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}')

    try:
        protein = ''
        try:
            os.system(f'obabel -ipdbqt ../../my_rp1_compounds_kinases/selected_mol2structures/{group}_pdbqt/{filename[:-5]}.pdbqt -opdb -O sparse_{filename[:-5]}.pdb')
            protein = next(oddt.toolkit.readfile('pdb', f'sparse_{filename[:-5]}.pdb'))
            proteinname = f'../../my_rp1_compounds_kinases/selected_mol2structures/{group}_pdbqt/{filename[:-5]}.pdbqt'
            print("loaded in structure on 1st try")
        except:
            print("On except..")
            ListFaultyStructures.append("Unsanitized! ",filename)
            protein = next(oddt.toolkit.readfile('pdb', f'sparse_{filename[:-5]}.pdb', sanitize=False))
            print("Unsanitized! ", filename)
        print("loaded in protein succesfully!")
        protein.protein = True

        rf1 = rfscore.load(version=1)
        with open('pickles/rf1.pickle', 'wb') as f:
            pickle.dump(rf1, f)
        f.close()
        rf2 = rfscore.load(version=2)
        with open('pickles/rf2.pickle', 'wb') as f:
            pickle.dump(rf2, f)
        f.close()
        rf3 = rfscore.load(version=3)
        with open('pickles/rf3.pickle', 'wb') as f:
            pickle.dump(rf3, f)
        f.close()
        nn = nnscore.load()
        with open('pickles/nn.pickle', 'wb') as f:
            pickle.dump(nn, f)
        f.close()
        plecscore = PLECscore.load()
        with open('pickles/plecscore.pickle', 'wb') as f:
            pickle.dump(plecscore, f)
        f.close()

        rf1.set_protein(protein)
        rf2.set_protein(protein)
        rf3.set_protein(protein)
        nn.set_protein(protein)
        plecscore.set_protein(protein)

        # KLIFS retrieving pocket for SIFP
        pocket = get_pocket(klifs_id)

        try:
            ligand_location_actives = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/actives/"
            ligand_location_inactives = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/inactives/"
            decoy_location_1000 = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/decoys/"
            decoy_location_1625 = "../../docking/"+group+"/"+uniprot+"/"+filename[:-5]+"/decoys1625/"
            run_number = 0
            number_of_actives = len(os.listdir(ligand_location_actives))
            number_of_inactives = len(os.listdir(ligand_location_inactives))
            grouplist = ["AGC","Atypical","CAMK","CK1","CMGC","STE","TK","TKL"]

            for j in sorted(os.listdir(ligand_location_actives))[file_number:]:
                print("Actives ", j, number_of_actives)
                print(file_number, number_of_actives)
                if file_number >= number_of_actives:
                    pass
                else:
                    try:
                        file_number += 1
                        run_number += 1
                        group_number = 0
                        ligandname = f'{ligand_location_actives}{j}'

                        # ligand = next(oddt.toolkit.readfile('pdbqt', ligand_location_actives+j))
                        os.system(f'obabel -ipdbqt {ligand_location_actives}{j} -osdf -O sparse_{filename[:-5]}.sdf')
                        ligand = next(oddt.toolkit.readfile('sdf',f'sparse_{filename[:-5]}.sdf'))
                        Type = "active"

                        # IFP = oddt.fingerprints.InteractionFingerprint(ligand, protein)
                        # SIFP = oddt.fingerprints.SimpleInteractionFingerprint(ligand,protein)
                        KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand)
                        Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP
                        KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP)))

                        PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
                        Conc_PLEC = PLEC
                        PLEC = ''.join(str(list(PLEC)))

                        SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5)
                        SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096)
                        Conc_SPLIF_ECFP2 = SPLIF_ECFP2
                        SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2)))

                        SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5)
                        SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096)
                        Conc_SPLIF_ECFP4 = SPLIF_ECFP4
                        SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4)))

                        SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5)
                        SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096)
                        Conc_SPLIF_ECFP6 = SPLIF_ECFP6
                        SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6)))

                        RFv1 = rf1.predict(ligand)
                        RFv2 = rf2.predict(ligand)
                        RFv3 = rf3.predict(ligand)
                        plec_score = plecscore.predict(ligand)
                        nn_score = nn.predict([ligand])
                        VinaScore = ligand.data

                        # This part is for concatenating all 5 poses
                        try:
                            # ligand = list(oddt.toolkit.readfile('pdbqt', ligand_location_actives+j))
                            ligand = list(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf'))
                            for l in [x for x in range(len(ligand)) if x != 0]:
                                KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[l])
                                Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2

                                PLEC_v2 = oddt.fingerprints.PLEC(ligand[l], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
                                Conc_PLEC += PLEC_v2

                                SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[l], protein, depth=1, size=4096, distance_cutoff=4.5)
                                SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096)
                                Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2

                                SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[l], protein, depth=2, size=4096, distance_cutoff=4.5)
                                SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096)
                                Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2

                                SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[l], protein, depth=3, size=4096, distance_cutoff=4.5)
                                SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096)
                                Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2
                            Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP)))
                            Conc_PLEC = ''.join(str(list(Conc_PLEC)))
                            Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2)))
                            Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4)))
                            Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6)))
                            print("Concatenated poses succesfully! Number of poses: "+str(l))
                        except:
                            print("Concat error!")
                            pass

                        #Dense to sparse for storage
                        # PLEC = PLEC.apply(to_sparse_fp)
                        # Conc_PLEC = oddt.fingerprints.dense_to_sparse(Conc_PLEC)
                        # print(len(PLEC))
                        # SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP2, size=4096)
                        # Conc_SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP2, size=4096)
                        # SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP4, size=4096)
                        # Conc_SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP4, size=4096)
                        # SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP6, size=4096)
                        # Conc_SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP6, size=4096)

                        #Find and calculate SMILES
                        with open(ligand_location_actives+j,'r') as f:
                            pdbqt_file = f.read()
                        f.close()
                        number_from_file = find_number(pdbqt_file, '.smi:')
                        print(number_from_file)
                        target_smiles = f'../../my_rp1_compounds_kinases/uniprot_kinase_{Type}s/{group}/{uniprot}_{Type}s.smi'
                        with open(target_smiles, 'r') as f:
                            smiles_file = f.readlines()
                        f.close()
                        SMILES = str(smiles_file[int(number_from_file[0])-1]).replace("\n","")
                        print(SMILES)

                        for q in range(len(csv_active_compounds)):
                            if csv_active_compounds["standardised_smiles"][q] == SMILES and csv_active_compounds["accession"][q] == uniprot:
                                pchembl_mean = csv_active_compounds["pchembl_value_Mean"][q]
                                pchembl_median = csv_active_compounds["pchembl_value_Median"][q]
                                break
                            else:
                                pchembl_mean = ''
                                pchembl_median = ''
                                pass

                        # Conc_PLEC,Conc_SPLIF_ECFP2,Conc_SPLIF_ECFP4,Conc_SPLIF_ECFP6,PLEC_v2,SPLIF_ECFP2_v2,SPLIF_ECFP4_v2,SPLIF_ECFP6_v2 ='','','','','','','',''

                        count += 1
                        dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
                        dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
                        dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
                        dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
                        dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
                        print("Done appending to dataframe! Number appended", count, int(run_number))

                        if int(run_number) > 375:
                            print("Going dark to preserve memory..")
                            with open(f'config_{var_from}_{var_to}.txt', 'w') as f:
                                to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to)
                                print(to_write)
                                f.write(to_write)
                            f.close()
                            dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
                            dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
                            dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
                            dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
                            dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
                            os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
                        else:
                            pass
                        gc.collect()

                        # del(ligand,SMILES,RFv1,RFv2,RFv3,PLEC,SPLIF_ECFP2,SPLIF_ECFP4,SPLIF_ECFP6,Conc_PLEC,Conc_SPLIF_ECFP2,Conc_SPLIF_ECFP4,Conc_SPLIF_ECFP6,VinaScore,PLEC_v2,SPLIF_ECFP2_v2,SPLIF_ECFP4_v2,SPLIF_ECFP6_v2)

                        # Other descriptors I could use later on?
                        # protein_atoms, ligand_atoms, strict = oddt.interactions.hbonds(protein, ligand)
                        # print(protein_atoms['resname'])
                        # wut = oddt.interactions.close_contacts(protein_atoms, ligand_atoms, cutoff=4, x_column='coords', y_column='coords')
                    except:
                        print("Actives. Errors for some reason..")
            print("Going to inactives.. ")
            for k in sorted(os.listdir(ligand_location_inactives))[(file_number-number_of_actives):]:
                print("Inactives ", k, number_of_inactives)
                if int(file_number) >= (int(number_of_actives)+int(number_of_inactives)):
                    print("passing! len actives + actives is: ", str(number_of_actives), str(number_of_inactives))
                    pass
                else:
                    try:
                        file_number += 1
                        run_number += 1
                        group_number = 0
                        ligandname = f'{ligand_location_inactives}{k}'

                        ligand = ''
                        Type = "inactive"
                        # ligand = next(oddt.toolkit.readfile('pdbqt', ligand_location_inactives+k))
                        os.system(f'obabel -ipdbqt {ligand_location_inactives}{k} -osdf -O sparse_{filename[:-5]}.sdf')
                        ligand = next(oddt.toolkit.readfile('sdf',f'sparse_{filename[:-5]}.sdf'))

                        KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand)
                        Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP
                        KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP)))

                        PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
                        Conc_PLEC = PLEC
                        PLEC = ''.join(str(list(PLEC)))

                        SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5)
                        SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096)
                        Conc_SPLIF_ECFP2 = SPLIF_ECFP2
                        SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2)))

                        SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5)
                        SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096)
                        Conc_SPLIF_ECFP4 = SPLIF_ECFP4
                        SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4)))

                        SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5)
                        SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096)
                        Conc_SPLIF_ECFP6 = SPLIF_ECFP6
                        SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6)))

                        RFv1 = rf1.predict(ligand)
                        RFv2 = rf2.predict(ligand)
                        RFv3 = rf3.predict(ligand)
                        plec_score = plecscore.predict(ligand)
                        nn_score = nn.predict([ligand])
                        VinaScore = ligand.data

                        # This part is for concatenating all 5 poses
                        try:
                            # ligand = list(oddt.toolkit.readfile('pdbqt', ligand_location_inactives+k))
                            ligand = list(oddt.toolkit.readfile('sdf',f'sparse_{filename[:-5]}.sdf'))
                            for m in [x for x in range(len(ligand)) if x != 0]:
                                KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[m])
                                Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2

                                PLEC_v2 = oddt.fingerprints.PLEC(ligand[m], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
                                Conc_PLEC += PLEC_v2

                                SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[m], protein, depth=1, size=4096, distance_cutoff=4.5)
                                SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096)
                                Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2

                                SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[m], protein, depth=2, size=4096, distance_cutoff=4.5)
                                SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096)
                                Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2

                                SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[m], protein, depth=3, size=4096, distance_cutoff=4.5)
                                SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096)
                                Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2
                            Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP)))
                            Conc_PLEC = ''.join(str(list(Conc_PLEC)))
                            Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2)))
                            Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4)))
                            Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6)))
                            print("Concatenated poses succesfully! Number of poses: "+str(m))
                        except:
                            print("Inactives. This file might not have 5 poses?")

                        #Find and calculate SMILES
                        with open(ligand_location_inactives+k,'r') as f:
                            pdbqt_file = f.read()
                        f.close()
                        number_from_file = find_number(pdbqt_file, '.smi:')
                        print(number_from_file)
                        target_smiles = f'../../my_rp1_compounds_kinases/uniprot_kinase_{Type}s/{group}/{uniprot}_{Type}s.smi'
                        with open(target_smiles, 'r') as f:
                            smiles_file = f.readlines()
                        f.close()
                        SMILES = str(smiles_file[int(number_from_file[0])-1]).replace("\n","")
                        print(SMILES)

                        for r in range(len(csv_inactive_compounds)):
                            if csv_inactive_compounds["standardised_smiles"][r] == SMILES and csv_inactive_compounds["accession"][r] == uniprot:
                                pchembl_mean = csv_inactive_compounds["pchembl_value_Mean"][r]
                                pchembl_median = csv_inactive_compounds["pchembl_value_Median"][r]
                                break
                            else:
                                pchembl_mean = ''
                                pchembl_median = ''
                                pass

                        count += 1
                        dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
                        dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
                        dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
                        dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
                        dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname
                        print("Done appending to dataframe! Number appended", count, int(run_number))

                        gc.collect()
                        if int(run_number) > 375:
                            print("Going dark to preserve memory..")
                            with open(f'config_{var_from}_{var_to}.txt', 'w') as f:
                                to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to)
                                print(to_write)
                                f.write(to_write)
                            f.close()
                            dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
                            dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
                            dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
                            dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
                            dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
                            os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
                        else:
                            pass

                    except:
                        print("No pdbqt files available maybe?")

            if int(count) == -1:
                pass
            else:
                print("Going dark for the decoys..")
                with open(f'config_{var_from}_{var_to}.txt', 'w') as f:
                    to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to)
                    print(to_write)
                    f.write(to_write)
                f.close()
                dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
                dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
                dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
                dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
                dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
                os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))

            # Decoys for loop here
            decoy_number = 0
            for n in range(group_number, len(grouplist)):
                print("On decoys!")

                if int(decoy_number) >= 125:
                    print("Going dark to preserve memory..")
                    if int(group_number) > 8:
                        next_from = int(var_from) + 1
                        if next_from == var_to:
                            print("DONE WITH RUN")
                            sys.exit()
                            os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str("blub"))
                            file_number, count, group_number = 0,-1,0
                            with open(f'config_{next_from}_{var_to}.txt', 'w') as f:
                                to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(next_from)+"\n"+str(var_to)
                                print(to_write)
                                f.write(to_write)
                            f.close()
                        dataframe_SIFP.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
                        dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}.csv')
                        dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
                        dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
                        dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
                        os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
                    with open(f'config_{var_from}_{var_to}.txt', 'w') as f:
                        to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to)
                        print(to_write)
                        f.write(to_write)
                    f.close()
                    dataframe_SIFP.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
                    dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}.csv')
                    dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
                    dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
                    dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
                    os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
                else:
                    group_number += 1
                    pass

                for o in sorted(os.listdir(decoy_location_1000+grouplist[n])):
                    print("Decoys. Decoynumber - group_number", o, group_number)
                    try:
                        file_number += 1
                        run_number += 1
                        decoy_number += 1
                        ligandname = f'{decoy_location_1000}{grouplist[n]}/{o}'
                        decoy_group = str(grouplist[n])

                        ligand = ''
                        Type = "decoy"
                        # ligand = next(oddt.toolkit.readfile('pdbqt', decoy_location_1000+grouplist[n]+"/"+o))
                        os.system(f'obabel -ipdbqt {decoy_location_1000}{grouplist[n]}/{o} -osdf -O sparse_{filename[:-5]}.sdf')
                        ligand = next(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf'))

                        KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand)
                        Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP
                        KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP)))

                        PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
                        Conc_PLEC = PLEC
                        PLEC = ''.join(str(list(PLEC)))

                        SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5)
                        SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096)
                        Conc_SPLIF_ECFP2 = SPLIF_ECFP2
                        SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2)))

                        SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5)
                        SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096)
                        Conc_SPLIF_ECFP4 = SPLIF_ECFP4
                        SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4)))

                        SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5)
                        SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096)
                        Conc_SPLIF_ECFP6 = SPLIF_ECFP6
                        SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6)))

                        RFv1 = rf1.predict(ligand)
                        RFv2 = rf2.predict(ligand)
                        RFv3 = rf3.predict(ligand)
                        plec_score = plecscore.predict(ligand)
                        nn_score = nn.predict([ligand])
                        VinaScore = ligand.data

                        # This part is for concatenating all 5 poses
                        try:
                            # ligand = list(oddt.toolkit.readfile('pdbqt', decoy_location_1000+grouplist[n]+"/"+o))
                            ligand = list(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf'))
                            for p in [x for x in range(len(ligand)) if x != 0]:
                                KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[p])
                                Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2

                                PLEC_v2 = oddt.fingerprints.PLEC(ligand[p], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
                                Conc_PLEC += PLEC_v2

                                SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=1, size=4096, distance_cutoff=4.5)
                                SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096)
                                Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2

                                SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=2, size=4096, distance_cutoff=4.5)
                                SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096)
                                Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2

                                SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=3, size=4096, distance_cutoff=4.5)
                                SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096)
                                Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2
                            Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP)))
                            Conc_PLEC = ''.join(str(list(Conc_PLEC)))
                            Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2)))
                            Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4)))
                            Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6)))
                            print("Concatenated poses succesfully! Number of poses: "+str(p))

                        except:
                            print("Decoys. This file might not have any poses?")

                        #Dense to sparse for storage
                        # PLEC = oddt.fingerprints.dense_to_sparse(PLEC, size=16384)
                        # Conc_PLEC = oddt.fingerprints.dense_to_sparse(Conc_PLEC, size=16384)
                        # SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP2, size=4096)
                        # Conc_SPLIF_ECFP2 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP2, size=4096)
                        # SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP4, size=4096)
                        # Conc_SPLIF_ECFP4 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP4, size=4096)
                        # SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(SPLIF_ECFP6, size=4096)
                        # Conc_SPLIF_ECFP6 = oddt.fingerprints.dense_to_sparse(Conc_SPLIF_ECFP6, size=4096)

                        # This doesn't (yet) work for the decoys!
                        # with open(decoy_location_1000+grouplist[n]+j,'r') as f:
                        #     pdbqt_file = f.read()
                        # f.close()
                        # number_from_file = find_number(pdbqt_file, '.smi:')
                        # target_smiles = f'../my_rp1_compounds_kinases/{Type}s/1000/{group}_125.smi'
                        # with open(target_smiles, 'r') as f:
                        #     smiles_file = f.readlines()
                        # f.close()
                        # SMILES = str(smiles_file[int(number_from_file[0])]).replace("\n","")
                        SMILES,pchembl_mean,pchembl_median = '','',''
                        print("On dataframe!")

                        count += 1
                        dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count],dataframe_SIFP["decoy_group"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
                        dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count],dataframe_PLEC["decoy_group"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
                        dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
                        dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
                        dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
                        print("Done appending to dataframe! Number appended", count, int(run_number))

                        gc.collect()
                    except:
                        print("No pdbqt files available maybe?")

                # Decoys of 1625, IF available
                try:
                    for o in sorted(os.listdir(decoy_location_1625+grouplist[n])):
                        print("Decoys. Decoynumber - group_number", o, group_number)
                        try:
                            # file_number += 1
                            # run_number += 1
                            # decoy_number += 1
                            ligandname = f'{decoy_location_1625}{grouplist[n]}/{o}'
                            decoy_group = str(grouplist[n])

                            ligand = ''
                            Type = "decoy_v2"
                            os.system(f'obabel -ipdbqt {decoy_location_1625}{grouplist[n]}/{o} -osdf -O sparse_{filename[:-5]}.sdf')
                            ligand = next(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf'))

                            KLIFS_pocket_IFP = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand)
                            Conc_KLIFS_pocket_IFP = KLIFS_pocket_IFP
                            KLIFS_pocket_IFP = ''.join(str(list(KLIFS_pocket_IFP)))

                            PLEC = oddt.fingerprints.PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
                            Conc_PLEC = PLEC
                            PLEC = ''.join(str(list(PLEC)))

                            SPLIF_ECFP2 = oddt.fingerprints.SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5)
                            SPLIF_ECFP2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2['hash'], size=4096)
                            Conc_SPLIF_ECFP2 = SPLIF_ECFP2
                            SPLIF_ECFP2 = ''.join(str(list(SPLIF_ECFP2)))

                            SPLIF_ECFP4 = oddt.fingerprints.SPLIF(ligand, protein, depth=2, size=4096, distance_cutoff=4.5)
                            SPLIF_ECFP4 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4['hash'], size=4096)
                            Conc_SPLIF_ECFP4 = SPLIF_ECFP4
                            SPLIF_ECFP4 = ''.join(str(list(SPLIF_ECFP4)))

                            SPLIF_ECFP6 = oddt.fingerprints.SPLIF(ligand, protein, depth=3, size=4096, distance_cutoff=4.5)
                            SPLIF_ECFP6 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6['hash'], size=4096)
                            Conc_SPLIF_ECFP6 = SPLIF_ECFP6
                            SPLIF_ECFP6 = ''.join(str(list(SPLIF_ECFP6)))

                            RFv1 = rf1.predict(ligand)
                            RFv2 = rf2.predict(ligand)
                            RFv3 = rf3.predict(ligand)
                            plec_score = plecscore.predict(ligand)
                            nn_score = nn.predict([ligand])
                            VinaScore = ligand.data

                            # This part is for concatenating all 5 poses
                            try:
                                # ligand = list(oddt.toolkit.readfile('pdbqt', decoy_location_1000+grouplist[n]+"/"+o))
                                ligand = list(oddt.toolkit.readfile('sdf', f'sparse_{filename[:-5]}.sdf'))
                                for p in [x for x in range(len(ligand)) if x != 0]:
                                    KLIFS_pocket_IFP_v2 = get_pocket_IFP(klifs_id=klifs_id, pocket=pocket, ligand=ligand[p])
                                    Conc_KLIFS_pocket_IFP += KLIFS_pocket_IFP_v2

                                    PLEC_v2 = oddt.fingerprints.PLEC(ligand[p], protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=False, ignore_hoh=True)
                                    Conc_PLEC += PLEC_v2

                                    SPLIF_ECFP2_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=1, size=4096, distance_cutoff=4.5)
                                    SPLIF_ECFP2_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP2_v2['hash'], size=4096)
                                    Conc_SPLIF_ECFP2 += SPLIF_ECFP2_v2

                                    SPLIF_ECFP4_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=2, size=4096, distance_cutoff=4.5)
                                    SPLIF_ECFP4_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP4_v2['hash'], size=4096)
                                    Conc_SPLIF_ECFP4 += SPLIF_ECFP4_v2

                                    SPLIF_ECFP6_v2 = oddt.fingerprints.SPLIF(ligand[p], protein, depth=3, size=4096, distance_cutoff=4.5)
                                    SPLIF_ECFP6_v2 = oddt.fingerprints.sparse_to_dense(SPLIF_ECFP6_v2['hash'], size=4096)
                                    Conc_SPLIF_ECFP6 += SPLIF_ECFP6_v2
                                Conc_KLIFS_pocket_IFP = ''.join(str(list(Conc_KLIFS_pocket_IFP)))
                                Conc_PLEC = ''.join(str(list(Conc_PLEC)))
                                Conc_SPLIF_ECFP2 = ''.join(str(list(Conc_SPLIF_ECFP2)))
                                Conc_SPLIF_ECFP4 = ''.join(str(list(Conc_SPLIF_ECFP4)))
                                Conc_SPLIF_ECFP6 = ''.join(str(list(Conc_SPLIF_ECFP6)))
                                print("Concatenated poses succesfully! Number of poses: "+str(p))

                            except:
                                print("Decoys. This file might not have any poses?")

                            SMILES,pchembl_mean,pchembl_median = '','',''
                            print("On dataframe!")

                            count += 1
                            dataframe_SIFP,dataframe_SIFP["KLIFS_pocket_IFP"][count],dataframe_SIFP["Conc_KLIFS_pocket_IFP"][count],dataframe_SIFP['VinaScore'][count],dataframe_SIFP['Type'][count],dataframe_SIFP["RFv1"][count],dataframe_SIFP["RFv2"][count],dataframe_SIFP["RFv3"][count],dataframe_SIFP["nn_score"][count],dataframe_SIFP["plec_score"][count],dataframe_SIFP["SMILES"][count],dataframe_SIFP["pchembl_value_Mean"][count],dataframe_SIFP["pchembl_value_Median"][count],dataframe_SIFP["protein"][count],dataframe_SIFP["compound"][count],dataframe_SIFP["decoy_group"][count] = dataframe_SIFP.append(csv_dataframe.iloc[i],ignore_index=True),KLIFS_pocket_IFP,Conc_KLIFS_pocket_IFP,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
                            dataframe_PLEC,dataframe_PLEC["PLEC"][count],dataframe_PLEC["Conc_PLEC"][count],dataframe_PLEC['VinaScore'][count],dataframe_PLEC['Type'][count],dataframe_PLEC["RFv1"][count],dataframe_PLEC["RFv2"][count],dataframe_PLEC["RFv3"][count],dataframe_PLEC["nn_score"][count],dataframe_PLEC["plec_score"][count],dataframe_PLEC["SMILES"][count],dataframe_PLEC["pchembl_value_Mean"][count],dataframe_PLEC["pchembl_value_Median"][count],dataframe_PLEC["protein"][count],dataframe_PLEC["compound"][count],dataframe_PLEC["decoy_group"][count] = dataframe_PLEC.append(csv_dataframe.iloc[i],ignore_index=True),PLEC,Conc_PLEC,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
                            dataframe_SPLIF_ECFP2,dataframe_SPLIF_ECFP2["SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2["Conc_SPLIF_ECFP2"][count],dataframe_SPLIF_ECFP2['VinaScore'][count],dataframe_SPLIF_ECFP2['Type'][count],dataframe_SPLIF_ECFP2["RFv1"][count],dataframe_SPLIF_ECFP2["RFv2"][count],dataframe_SPLIF_ECFP2["RFv3"][count],dataframe_SPLIF_ECFP2["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP2["SMILES"][count],dataframe_SPLIF_ECFP2["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP2["pchembl_value_Median"][count],dataframe_SPLIF_ECFP2["protein"][count],dataframe_SPLIF_ECFP2["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP2.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP2,Conc_SPLIF_ECFP2,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
                            dataframe_SPLIF_ECFP4,dataframe_SPLIF_ECFP4["SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4["Conc_SPLIF_ECFP4"][count],dataframe_SPLIF_ECFP4['VinaScore'][count],dataframe_SPLIF_ECFP4['Type'][count],dataframe_SPLIF_ECFP4["RFv1"][count],dataframe_SPLIF_ECFP4["RFv2"][count],dataframe_SPLIF_ECFP4["RFv3"][count],dataframe_SPLIF_ECFP4["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP4["SMILES"][count],dataframe_SPLIF_ECFP4["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP4["pchembl_value_Median"][count],dataframe_SPLIF_ECFP4["protein"][count],dataframe_SPLIF_ECFP4["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP4.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP4,Conc_SPLIF_ECFP4,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
                            dataframe_SPLIF_ECFP6,dataframe_SPLIF_ECFP6["SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6["Conc_SPLIF_ECFP6"][count],dataframe_SPLIF_ECFP6['VinaScore'][count],dataframe_SPLIF_ECFP6['Type'][count],dataframe_SPLIF_ECFP6["RFv1"][count],dataframe_SPLIF_ECFP6["RFv2"][count],dataframe_SPLIF_ECFP6["RFv3"][count],dataframe_SPLIF_ECFP6["nn_score"][count],dataframe_SPLIF_ECFP2["plec_score"][count],dataframe_SPLIF_ECFP6["SMILES"][count],dataframe_SPLIF_ECFP6["pchembl_value_Mean"][count],dataframe_SPLIF_ECFP6["pchembl_value_Median"][count],dataframe_SPLIF_ECFP6["protein"][count],dataframe_SPLIF_ECFP6["compound"][count],dataframe_SPLIF_ECFP2["decoy_group"][count] = dataframe_SPLIF_ECFP6.append(csv_dataframe.iloc[i],ignore_index=True),SPLIF_ECFP6,Conc_SPLIF_ECFP6,VinaScore['vina_affinity'],Type,RFv1,RFv2,RFv3,nn_score,plec_score,SMILES,pchembl_mean,pchembl_median,proteinname,ligandname,decoy_group
                            print("Done appending to dataframe! Number appended", count, int(run_number))
                        except:
                            print("No pdbqt files available maybe?")


                except:
                    print("No second decoy files available")

                if int(group_number) > 8:
                            var_from = next_from
                else:
                    pass

                if int(decoy_number) >= 125:
                    print("Going dark to preserve memory..")
                    with open(f'config_{var_from}_{var_to}.txt', 'w') as f:
                        to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(var_from)+"\n"+str(var_to)
                        print(to_write)
                        f.write(to_write)
                    f.close()
                    dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
                    dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
                    dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
                    dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
                    dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
                    os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(var_from), str(var_to))
                else:
                    pass
        except:
            print("There is a time to run and a time to error out")
            # ListFaultyStructures.append(filename)
            quit("No valid structure to calculate on")
    except:
        print("Maybe haven't downloaded ", filename,"moving on to next structure!")
        ListFaultyStructures.append(filename)
        next_from = int(var_from) + 1
        if next_from == var_to:
            print("DONE WITH RUN")
            sys.exit()
            os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str("blub"))
        file_number, count, group_number = 0,-1,0
        with open(f'config_{next_from}_{var_to}.txt', 'w') as f:
            to_write = str(file_number)+"\n"+str(count)+"\n"+str(group_number)+"\n"+str(next_from)+"\n"+str(var_to)
            print(to_write)
            f.write(to_write)
        f.close()
        dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
        dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}_{file_number}.csv')
        dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}_{file_number}.csv')
        dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}_{file_number}.csv')
        dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}_{file_number}.csv')
        os.execl(sys.executable, 'python', f'./create_IFP_datasets_v2_dense.py', str(next_from), str(var_to))

print("Done with run!")
# dataframe_SIFP.to_csv(f'../IFP_datasets/SIFP_v2/{filename[:-5]}/dataframe_SIFP_dense_{filename[:-5]}_{file_number}.csv')
# dataframe_PLEC.to_csv(f'../IFP_datasets/PLEC_v2/{filename[:-5]}/dataframe_PLEC_dense_{filename[:-5]}.csv')
# dataframe_SPLIF_ECFP2.to_csv(f'../IFP_datasets/SPLIF_ECFP2_v2/{filename[:-5]}/dataframe_SPLIF_ECFP2_dense_{filename[:-5]}.csv')
# dataframe_SPLIF_ECFP4.to_csv(f'../IFP_datasets/SPLIF_ECFP4_v2/{filename[:-5]}/dataframe_SPLIF_ECFP4_dense_{filename[:-5]}.csv')
# dataframe_SPLIF_ECFP6.to_csv(f'../IFP_datasets/SPLIF_ECFP6_v2/{filename[:-5]}/dataframe_SPLIF_ECFP6_dense_{filename[:-5]}.csv')
# with open(f'../IFP_datasets/FaultyStructures_dense_{filename[:-5]}.txt', "w") as f:
#     f.writelines(ListFaultyStructures)
# f.close()
# rdkit.SimDivFilters.rdSimDivPickers.MaxMinPicker()