Prince/funda/funda.py

40 lines
1.7 KiB
Python
Raw Permalink Normal View History

2021-08-16 20:06:55 +00:00
from storage import Storage
from selenium import webdriver
import os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from models.house import House
from storage import Storage
CHROMEDRIVER = "chromedriver"
FUNDA_BASE = "https://www.funda.nl"
class FundaCrawler():
def __init__(self, storage : Storage, placename="heel-nederland", minimum_price = 0, maximum_price=250000, distance=30):
self.placename = placename
self.minimum_price = minimum_price
self.maximum_price = maximum_price
self.distance = distance
self.storage : Storage = storage
#https://www.funda.nl/koop/gemeente-barendrecht/0-250000/+30km/
self.url = f"{FUNDA_BASE}/koop/{placename}/{minimum_price}-{maximum_price}/+{distance}"
self.chrome = Options()
self.chrome.add_argument("--headless")
self.chrome.add_argument('--user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"')
self.chrome.binary_location = '/usr/bin/google-chrome'
self.driver = webdriver.Chrome(executable_path=os.path.abspath(CHROMEDRIVER), chrome_options=self.chrome)
def crawlKoopwoningen(self):
self.driver.get(self.url)
#TODO Add error checking
ret = []
if("Koopwoningen" in self.driver.title):
houses = self.driver.find_elements_by_class_name("search-result")
for house in houses:
ret.append(House(house.id, None, house.text, house.screenshot_as_base64))
self.storage.AddHouse(House(house.id, None, house.text, house.screenshot_as_base64))
return ret
print(self.driver)