Initial
This commit is contained in:
parent
b62af9b631
commit
7b748b5b9e
15
funda/.vscode/launch.json
vendored
Normal file
15
funda/.vscode/launch.json
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python: Main",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "main.py",
|
||||
"console": "integratedTerminal"
|
||||
}
|
||||
]
|
||||
}
|
6
funda/.vscode/settings.json
vendored
Normal file
6
funda/.vscode/settings.json
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"python.linting.pycodestyleEnabled": false,
|
||||
"python.linting.enabled": true,
|
||||
"python.linting.banditEnabled": false,
|
||||
"python.linting.pylintEnabled": true
|
||||
}
|
6
funda/LICENSE
Normal file
6
funda/LICENSE
Normal file
@ -0,0 +1,6 @@
|
||||
This is the Herreweb Family License.
|
||||
This code maybe used unconditionally within the Herrewijnen Family group.
|
||||
Apart from that this code is considered confidential because it might contain family based stuff and so it is not to be shared freely and if not acquired with explicit permission may NOT be used.
|
||||
|
||||
|
||||
|
3
funda/README.md
Normal file
3
funda/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
# Prince
|
||||
|
||||
Funda crawler for searching for a house.
|
BIN
funda/chromedriver
Executable file
BIN
funda/chromedriver
Executable file
Binary file not shown.
5
funda/config.py
Normal file
5
funda/config.py
Normal file
@ -0,0 +1,5 @@
|
||||
from storage import SQLITE_PATH
|
||||
|
||||
|
||||
DEBUG = True # Turns on debugging features in Flask
|
||||
SQLITE_PATH = "storage.db"
|
40
funda/funda.py
Normal file
40
funda/funda.py
Normal file
@ -0,0 +1,40 @@
|
||||
from storage import Storage
|
||||
from selenium import webdriver
|
||||
import os
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from models.house import House
|
||||
from storage import Storage
|
||||
|
||||
CHROMEDRIVER = "chromedriver"
|
||||
|
||||
FUNDA_BASE = "https://www.funda.nl"
|
||||
|
||||
class FundaCrawler():
|
||||
def __init__(self, storage : Storage, placename="heel-nederland", minimum_price = 0, maximum_price=250000, distance=30):
|
||||
self.placename = placename
|
||||
self.minimum_price = minimum_price
|
||||
self.maximum_price = maximum_price
|
||||
self.distance = distance
|
||||
self.storage : Storage = storage
|
||||
#https://www.funda.nl/koop/gemeente-barendrecht/0-250000/+30km/
|
||||
self.url = f"{FUNDA_BASE}/koop/{placename}/{minimum_price}-{maximum_price}/+{distance}"
|
||||
self.chrome = Options()
|
||||
self.chrome.add_argument("--headless")
|
||||
self.chrome.add_argument('--user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"')
|
||||
self.chrome.binary_location = '/usr/bin/google-chrome'
|
||||
self.driver = webdriver.Chrome(executable_path=os.path.abspath(CHROMEDRIVER), chrome_options=self.chrome)
|
||||
|
||||
|
||||
def crawlKoopwoningen(self):
|
||||
self.driver.get(self.url)
|
||||
#TODO Add error checking
|
||||
ret = []
|
||||
if("Koopwoningen" in self.driver.title):
|
||||
houses = self.driver.find_elements_by_class_name("search-result")
|
||||
for house in houses:
|
||||
ret.append(House(house.id, None, house.text, house.screenshot_as_base64))
|
||||
self.storage.AddHouse(House(house.id, None, house.text, house.screenshot_as_base64))
|
||||
return ret
|
||||
print(self.driver)
|
BIN
funda/geckodriver
Executable file
BIN
funda/geckodriver
Executable file
Binary file not shown.
17
funda/main.py
Normal file
17
funda/main.py
Normal file
@ -0,0 +1,17 @@
|
||||
from funda import FundaCrawler
|
||||
from webserver import *
|
||||
from storage import Storage
|
||||
|
||||
def main():
|
||||
#Storage module
|
||||
st = Storage()
|
||||
|
||||
#Setup thread with automatic crawler
|
||||
fc = FundaCrawler(st, placename="Ridderkerk")
|
||||
# fc.crawlKoopwoningen()
|
||||
|
||||
StartServer(st, fc)
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
7
funda/models/house.py
Normal file
7
funda/models/house.py
Normal file
@ -0,0 +1,7 @@
|
||||
class House():
|
||||
def __init__(self, id, url, name, image, checked=False):
|
||||
self.id = id
|
||||
self.name = name
|
||||
self.image = image
|
||||
self.url = url
|
||||
self.checked = checked
|
BIN
funda/out.png
Normal file
BIN
funda/out.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 82 KiB |
0
funda/requirements.txt
Normal file
0
funda/requirements.txt
Normal file
BIN
funda/storage.db
Normal file
BIN
funda/storage.db
Normal file
Binary file not shown.
39
funda/storage.py
Normal file
39
funda/storage.py
Normal file
@ -0,0 +1,39 @@
|
||||
'''
|
||||
Storage module
|
||||
'''
|
||||
from models.house import House
|
||||
import sqlite3
|
||||
|
||||
|
||||
|
||||
class Storage():
|
||||
def __init__(self, path="storage.db"):
|
||||
self.db = sqlite3.connect(path, check_same_thread=False)
|
||||
self.cursor = self.db.cursor()
|
||||
housestable = """CREATE TABLE IF NOT EXISTS houses (
|
||||
id integer PRIMARY KEY,
|
||||
name text NOT NULL,
|
||||
image text NOT NULL,
|
||||
url text,
|
||||
checked integer
|
||||
);"""
|
||||
self.db.execute(housestable)
|
||||
self.db.commit()
|
||||
|
||||
def close(self):
|
||||
self.db.close()
|
||||
|
||||
def AddHouse(self, house : House):
|
||||
self.db.execute(f"""
|
||||
INSERT INTO houses(name, image, url, checked)
|
||||
VALUES
|
||||
('{house.name}', '{house.image}', '{str(house.url)}', {int(house.checked)})
|
||||
""")
|
||||
self.db.commit()
|
||||
|
||||
def GetHouses(self):
|
||||
hs = self.cursor.execute("select * from houses").fetchall()
|
||||
ret = []
|
||||
for h in hs:
|
||||
ret.append(House(id=h[0], url=h[3], name=h[1], image=h[2], checked=h[4]))
|
||||
return ret
|
26
funda/webserver.py
Normal file
26
funda/webserver.py
Normal file
@ -0,0 +1,26 @@
|
||||
from flask import Flask
|
||||
from funda import FundaCrawler
|
||||
from storage import Storage
|
||||
import base64
|
||||
|
||||
storage : Storage
|
||||
crawler : FundaCrawler
|
||||
|
||||
app = Flask("Project Prince")
|
||||
@app.route("/")
|
||||
def index():
|
||||
if(storage == None):
|
||||
return "Storage not found"
|
||||
houses = storage.GetHouses()
|
||||
ret = ""
|
||||
# img = base64.decodebytes(bytes(houses[0].image, "utf-8"))
|
||||
for house in houses:
|
||||
ret += f'<img src="data:image/png;base64,{house.image}" alt="img_data" id="imgslot"/>'
|
||||
return ret
|
||||
|
||||
|
||||
def StartServer(st, cw):
|
||||
global storage, crawler
|
||||
storage = st
|
||||
crawler = cw
|
||||
app.run(debug=app.config["DEBUG"])
|
Loading…
Reference in New Issue
Block a user