from storage import Storage from selenium import webdriver import os from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.chrome.options import Options from models.house import House from storage import Storage CHROMEDRIVER = "chromedriver" FUNDA_BASE = "https://www.funda.nl" class FundaCrawler(): def __init__(self, storage : Storage, placename="heel-nederland", minimum_price = 0, maximum_price=250000, distance=30): self.placename = placename self.minimum_price = minimum_price self.maximum_price = maximum_price self.distance = distance self.storage : Storage = storage #https://www.funda.nl/koop/gemeente-barendrecht/0-250000/+30km/ self.url = f"{FUNDA_BASE}/koop/{placename}/{minimum_price}-{maximum_price}/+{distance}" self.chrome = Options() self.chrome.add_argument("--headless") self.chrome.add_argument('--user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"') self.chrome.binary_location = '/usr/bin/google-chrome' self.driver = webdriver.Chrome(executable_path=os.path.abspath(CHROMEDRIVER), chrome_options=self.chrome) def crawlKoopwoningen(self): self.driver.get(self.url) #TODO Add error checking ret = [] if("Koopwoningen" in self.driver.title): houses = self.driver.find_elements_by_class_name("search-result") for house in houses: ret.append(House(house.id, None, house.text, house.screenshot_as_base64)) self.storage.AddHouse(House(house.id, None, house.text, house.screenshot_as_base64)) return ret print(self.driver)