diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..6b76b4f --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + } + ] +} \ No newline at end of file diff --git a/extract-text.py b/extract-text.py index c1f18c0..605f5c0 100644 --- a/extract-text.py +++ b/extract-text.py @@ -16,7 +16,7 @@ def extract_text_from_pdf(pdf_path): return lines # Replace 'your_pdf_file.pdf' with the actual path to your PDF file -pdf_path = '/mnt/c/Projects/telosvertaling/telos_nt.pdf' +pdf_path = 'telos_nt.pdf' extracted_text = extract_text_from_pdf(pdf_path)#.replace('\n', '') def merge_entries(verse_list): @@ -110,10 +110,23 @@ df['Chapter'] = df['Chapter'].fillna(method='bfill') for book in df['Bookname'].unique(): df.loc[df['Bookname'] == book, 'Book'] = list(df['Bookname'].unique()).index(book) + 40 -df.to_csv('/mnt/c/Projects/telosvertaling/telos.csv', index=False) +df.to_csv('telos.csv', index=False) -# write to sql database -df.to_sql('telos', 'sqlite:///telos.db', if_exists='replace', index=False) +df_details = pd.DataFrame(columns=['Description', 'Abbreviation', 'Comments', 'Version', 'VersionDate', 'PublishDate', 'RightToLeft', 'OT', 'NT', 'Strong']) +df_details.loc[0] = ['Telos vertaling', 'TELOS', 'Telos from PDF. H. Medema Vaassen, revised from the original Voorhoeve translation from 1877.', '1', '2024-09-29', '1982-01-01', 0, 0, 1, 0] + +df['Verse'] = df['Verse'].astype(int) +df['Chapter'] = df['Chapter'].astype(int) +df['Book'] = df['Book'].astype(int) +df['Bookname'] = df['Bookname'].astype(str) +df['Scripture'] = df['Scripture'].astype(str) +# Write both dataframes to a single db file +import sqlite3 + +conn = sqlite3.connect('telos.mybible') +df[['Book', 'Chapter', 'Verse', 'Scripture']].to_sql('Scripture', conn, if_exists='replace', index=False) +df_details.to_sql('Details', conn, if_exists='replace', index=False) +conn.close() # with open('/mnt/c/Projects/telosvertaling/merged_entries.txt', 'w', encoding='utf-8') as f: # f.write(str([text for text in merged_entries])) diff --git a/merged_entries.txt b/merged_entries.txt deleted file mode 100644 index 0637a08..0000000 --- a/merged_entries.txt +++ /dev/null @@ -1 +0,0 @@ -[] \ No newline at end of file diff --git a/telos.db b/telos.db new file mode 100644 index 0000000..3a5438d Binary files /dev/null and b/telos.db differ diff --git a/telos.mybible b/telos.mybible new file mode 100644 index 0000000..2f77a07 Binary files /dev/null and b/telos.mybible differ