usse/scrape/funda-scraper/funda_scraper/config/config.yaml

63 lines
2.3 KiB
YAML
Raw Normal View History

2023-12-22 14:26:01 +00:00
header:
user-agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36"
base_url: https://www.funda.nl/en
keep_cols:
sold_data:
- ym_sold
- year_sold
- term_days
- date_sold
selling_data:
- house_id
- city
- house_type
- building_type
- price
- price_m2
2024-04-02 20:20:30 +00:00
- price_m2_total
2023-12-22 14:26:01 +00:00
- room
- bedroom
- bathroom
- living_area
2024-04-02 20:20:30 +00:00
- perceel_area
2023-12-22 14:26:01 +00:00
- energy_label
- zip
- address
- year_built
- house_age
# - date_list
# - ym_list
# - year_list
- descrip
- photo
css_selector:
url: none
price: ".object-header__price"
address: ".object-header__title"
descrip: ".object-description-body"
listed_since: ".fd-align-items-center:nth-child(6) span"
zip_code: ".object-header__subtitle"
size: ".fd-m-right-xl--bp-m .fd-text--nowrap"
year: ".fd-align-items-center~ .fd-align-items-center .fd-m-right-xs"
living_area: ".object-kenmerken-list:nth-child(8) .fd-align-items-center:nth-child(2) span"
2024-04-02 20:20:30 +00:00
perceel_area: ".object-kenmerken-list:nth-child(8) .fd-align-items-center:nth-child(5) span"
2023-12-22 14:26:01 +00:00
kind_of_house: ".object-kenmerken-list:nth-child(5) .fd-align-items-center:nth-child(2) span"
building_type: ".object-kenmerken-list:nth-child(5) .fd-align-items-center:nth-child(4) span"
num_of_rooms: ".object-kenmerken-list:nth-child(11) .fd-align-items-center:nth-child(2)"
num_of_bathrooms: ".object-kenmerken-list:nth-child(11) .fd-align-items-center:nth-child(4)"
layout: ".object-kenmerken-list:nth-child(11)"
energy_label: ".energielabel"
insulation: ".object-kenmerken-list:nth-child(14) .fd-align-items-center:nth-child(4)"
heating: ".object-kenmerken-list:nth-child(14) .fd-align-items-center:nth-child(6)"
ownership: ".object-kenmerken-list:nth-child(17) .fd-align-items-center:nth-child(4)"
exteriors: ".object-kenmerken-list:nth-child(19)"
parking: ".object-kenmerken-list:nth-child(24)"
neighborhood_name: ".fd-display-inline--bp-m"
date_list: "dd:nth-child(2)"
date_sold: "dd:nth-child(4)"
term: "dd:nth-child(6)"
price_sold: ".object-header__price--historic"
last_ask_price: ".object-kenmerken-list:nth-child(2) .fd-align-items-center:nth-child(2)"
last_ask_price_m2: ".object-kenmerken-list__asking-price"
photo: ".media-viewer-overview__section-list-item--photo img[data-lazy]"