40 lines
1.7 KiB
Python
40 lines
1.7 KiB
Python
from storage import Storage
|
|
from selenium import webdriver
|
|
import os
|
|
from selenium import webdriver
|
|
from selenium.webdriver.common.keys import Keys
|
|
from selenium.webdriver.chrome.options import Options
|
|
from models.house import House
|
|
from storage import Storage
|
|
|
|
CHROMEDRIVER = "chromedriver"
|
|
|
|
FUNDA_BASE = "https://www.funda.nl"
|
|
|
|
class FundaCrawler():
|
|
def __init__(self, storage : Storage, placename="heel-nederland", minimum_price = 0, maximum_price=250000, distance=30):
|
|
self.placename = placename
|
|
self.minimum_price = minimum_price
|
|
self.maximum_price = maximum_price
|
|
self.distance = distance
|
|
self.storage : Storage = storage
|
|
#https://www.funda.nl/koop/gemeente-barendrecht/0-250000/+30km/
|
|
self.url = f"{FUNDA_BASE}/koop/{placename}/{minimum_price}-{maximum_price}/+{distance}"
|
|
self.chrome = Options()
|
|
self.chrome.add_argument("--headless")
|
|
self.chrome.add_argument('--user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"')
|
|
self.chrome.binary_location = '/usr/bin/google-chrome'
|
|
self.driver = webdriver.Chrome(executable_path=os.path.abspath(CHROMEDRIVER), chrome_options=self.chrome)
|
|
|
|
|
|
def crawlKoopwoningen(self):
|
|
self.driver.get(self.url)
|
|
#TODO Add error checking
|
|
ret = []
|
|
if("Koopwoningen" in self.driver.title):
|
|
houses = self.driver.find_elements_by_class_name("search-result")
|
|
for house in houses:
|
|
ret.append(House(house.id, None, house.text, house.screenshot_as_base64))
|
|
self.storage.AddHouse(House(house.id, None, house.text, house.screenshot_as_base64))
|
|
return ret
|
|
print(self.driver) |