This commit is contained in:
Eljakim Herrewijnen 2021-08-16 22:06:55 +02:00
parent b62af9b631
commit 7b748b5b9e
15 changed files with 164 additions and 0 deletions

15
funda/.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Main",
"type": "python",
"request": "launch",
"program": "main.py",
"console": "integratedTerminal"
}
]
}

6
funda/.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,6 @@
{
"python.linting.pycodestyleEnabled": false,
"python.linting.enabled": true,
"python.linting.banditEnabled": false,
"python.linting.pylintEnabled": true
}

6
funda/LICENSE Normal file
View File

@ -0,0 +1,6 @@
This is the Herreweb Family License.
This code maybe used unconditionally within the Herrewijnen Family group.
Apart from that this code is considered confidential because it might contain family based stuff and so it is not to be shared freely and if not acquired with explicit permission may NOT be used.

3
funda/README.md Normal file
View File

@ -0,0 +1,3 @@
# Prince
Funda crawler for searching for a house.

BIN
funda/chromedriver Executable file

Binary file not shown.

5
funda/config.py Normal file
View File

@ -0,0 +1,5 @@
from storage import SQLITE_PATH
DEBUG = True # Turns on debugging features in Flask
SQLITE_PATH = "storage.db"

40
funda/funda.py Normal file
View File

@ -0,0 +1,40 @@
from storage import Storage
from selenium import webdriver
import os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from models.house import House
from storage import Storage
CHROMEDRIVER = "chromedriver"
FUNDA_BASE = "https://www.funda.nl"
class FundaCrawler():
def __init__(self, storage : Storage, placename="heel-nederland", minimum_price = 0, maximum_price=250000, distance=30):
self.placename = placename
self.minimum_price = minimum_price
self.maximum_price = maximum_price
self.distance = distance
self.storage : Storage = storage
#https://www.funda.nl/koop/gemeente-barendrecht/0-250000/+30km/
self.url = f"{FUNDA_BASE}/koop/{placename}/{minimum_price}-{maximum_price}/+{distance}"
self.chrome = Options()
self.chrome.add_argument("--headless")
self.chrome.add_argument('--user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"')
self.chrome.binary_location = '/usr/bin/google-chrome'
self.driver = webdriver.Chrome(executable_path=os.path.abspath(CHROMEDRIVER), chrome_options=self.chrome)
def crawlKoopwoningen(self):
self.driver.get(self.url)
#TODO Add error checking
ret = []
if("Koopwoningen" in self.driver.title):
houses = self.driver.find_elements_by_class_name("search-result")
for house in houses:
ret.append(House(house.id, None, house.text, house.screenshot_as_base64))
self.storage.AddHouse(House(house.id, None, house.text, house.screenshot_as_base64))
return ret
print(self.driver)

BIN
funda/geckodriver Executable file

Binary file not shown.

17
funda/main.py Normal file
View File

@ -0,0 +1,17 @@
from funda import FundaCrawler
from webserver import *
from storage import Storage
def main():
#Storage module
st = Storage()
#Setup thread with automatic crawler
fc = FundaCrawler(st, placename="Ridderkerk")
# fc.crawlKoopwoningen()
StartServer(st, fc)
pass
if __name__ == "__main__":
main()

7
funda/models/house.py Normal file
View File

@ -0,0 +1,7 @@
class House():
def __init__(self, id, url, name, image, checked=False):
self.id = id
self.name = name
self.image = image
self.url = url
self.checked = checked

BIN
funda/out.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 82 KiB

0
funda/requirements.txt Normal file
View File

BIN
funda/storage.db Normal file

Binary file not shown.

39
funda/storage.py Normal file
View File

@ -0,0 +1,39 @@
'''
Storage module
'''
from models.house import House
import sqlite3
class Storage():
def __init__(self, path="storage.db"):
self.db = sqlite3.connect(path, check_same_thread=False)
self.cursor = self.db.cursor()
housestable = """CREATE TABLE IF NOT EXISTS houses (
id integer PRIMARY KEY,
name text NOT NULL,
image text NOT NULL,
url text,
checked integer
);"""
self.db.execute(housestable)
self.db.commit()
def close(self):
self.db.close()
def AddHouse(self, house : House):
self.db.execute(f"""
INSERT INTO houses(name, image, url, checked)
VALUES
('{house.name}', '{house.image}', '{str(house.url)}', {int(house.checked)})
""")
self.db.commit()
def GetHouses(self):
hs = self.cursor.execute("select * from houses").fetchall()
ret = []
for h in hs:
ret.append(House(id=h[0], url=h[3], name=h[1], image=h[2], checked=h[4]))
return ret

26
funda/webserver.py Normal file
View File

@ -0,0 +1,26 @@
from flask import Flask
from funda import FundaCrawler
from storage import Storage
import base64
storage : Storage
crawler : FundaCrawler
app = Flask("Project Prince")
@app.route("/")
def index():
if(storage == None):
return "Storage not found"
houses = storage.GetHouses()
ret = ""
# img = base64.decodebytes(bytes(houses[0].image, "utf-8"))
for house in houses:
ret += f'<img src="data:image/png;base64,{house.image}" alt="img_data" id="imgslot"/>'
return ret
def StartServer(st, cw):
global storage, crawler
storage = st
crawler = cw
app.run(debug=app.config["DEBUG"])