Prince/funda/funda.py
Eljakim Herrewijnen 7b748b5b9e Initial
2021-08-16 22:06:55 +02:00

40 lines
1.7 KiB
Python

from storage import Storage
from selenium import webdriver
import os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from models.house import House
from storage import Storage
CHROMEDRIVER = "chromedriver"
FUNDA_BASE = "https://www.funda.nl"
class FundaCrawler():
def __init__(self, storage : Storage, placename="heel-nederland", minimum_price = 0, maximum_price=250000, distance=30):
self.placename = placename
self.minimum_price = minimum_price
self.maximum_price = maximum_price
self.distance = distance
self.storage : Storage = storage
#https://www.funda.nl/koop/gemeente-barendrecht/0-250000/+30km/
self.url = f"{FUNDA_BASE}/koop/{placename}/{minimum_price}-{maximum_price}/+{distance}"
self.chrome = Options()
self.chrome.add_argument("--headless")
self.chrome.add_argument('--user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"')
self.chrome.binary_location = '/usr/bin/google-chrome'
self.driver = webdriver.Chrome(executable_path=os.path.abspath(CHROMEDRIVER), chrome_options=self.chrome)
def crawlKoopwoningen(self):
self.driver.get(self.url)
#TODO Add error checking
ret = []
if("Koopwoningen" in self.driver.title):
houses = self.driver.find_elements_by_class_name("search-result")
for house in houses:
ret.append(House(house.id, None, house.text, house.screenshot_as_base64))
self.storage.AddHouse(House(house.id, None, house.text, house.screenshot_as_base64))
return ret
print(self.driver)