usse/scrape/funda-scraper/funda_scraper/config/config.yaml
2024-08-24 16:52:50 +00:00

63 lines
3.1 KiB
YAML

header:
user-agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36"
base_url: https://www.funda.nl/en
keep_cols:
sold_data:
- ym_sold
- year_sold
- term_days
- date_sold
selling_data:
- house_id
- city
- house_type
- building_type
- price
- price_m2
- price_m2_total
- room
- bedroom
- bathroom
- living_area
- perceel_area
- energy_label
- zip
- address
- year_built
- house_age
# - date_list
# - ym_list
# - year_list
- descrip
- photo
css_selector:
url: none
price: "#__nuxt>section>main>div>div.relative.m-auto.flex.max-w-screen-lg.flex-col.lg\\:flex-row.xl\\:px-0>div.mt-4.px-4.lg\\:mt-6.lg\\:w-\\[70\\%\\].lg\\:pr-6>div.mb-4.border-b.border-solid.border-neutral-20.pb-4>div.mt-5.flex.flex-wrap.items-center.gap-3.lg\\:mt-6>div>div>span"
address: '#__nuxt>section>main>div>div.relative.m-auto.flex.max-w-screen-lg.flex-col.lg\\:flex-row.xl\\:px-0>div.mt-4.px-4.lg\\:mt-6.lg\\:w-\\[70\\%\\].lg\\:pr-6>div.mb-4.border-b.border-solid.border-neutral-20.pb-4>div.relative.flex.justify-between.pt-2.lg\\:pt-4>h1>span.block.text-2xl.font-bold.md\\:text-3xl.lg\\:text-4xl'
descrip: ".object-description-body"
listed_since: ".fd-align-items-center:nth-child(6) span"
zip_code: '#__nuxt>section>main>div>div.relative.m-auto.flex.max-w-screen-lg.flex-col.lg\\:flex-row.xl\\:px-0>div.mt-4.px-4.lg\\:mt-6.lg\\:w-\\[70\\%\\].lg\\:pr-6>div.mb-4.border-b.border-solid.border-neutral-20.pb-4>div.relative.flex.justify-between.pt-2.lg\\:pt-4>h1>span.text-neutral-40'
size: ".fd-m-right-xl--bp-m .fd-text--nowrap"
year: ".fd-align-items-center~ .fd-align-items-center .fd-m-right-xs"
living_area: ".object-kenmerken-list:nth-child(8) .fd-align-items-center:nth-child(2) span"
perceel_area: ".object-kenmerken-list:nth-child(8) .fd-align-items-center:nth-child(5) span"
kind_of_house: ".object-kenmerken-list:nth-child(5) .fd-align-items-center:nth-child(2) span"
building_type: ".object-kenmerken-list:nth-child(5) .fd-align-items-center:nth-child(4) span"
num_of_rooms: ".object-kenmerken-list:nth-child(11) .fd-align-items-center:nth-child(2)"
num_of_bathrooms: ".object-kenmerken-list:nth-child(11) .fd-align-items-center:nth-child(4)"
layout: ".object-kenmerken-list:nth-child(11)"
energy_label: ".energielabel"
insulation: ".object-kenmerken-list:nth-child(14) .fd-align-items-center:nth-child(4)"
heating: ".object-kenmerken-list:nth-child(14) .fd-align-items-center:nth-child(6)"
ownership: ".object-kenmerken-list:nth-child(17) .fd-align-items-center:nth-child(4)"
exteriors: ".object-kenmerken-list:nth-child(19)"
parking: ".object-kenmerken-list:nth-child(24)"
neighborhood_name: ".fd-display-inline--bp-m"
date_list: "dd:nth-child(2)"
date_sold: "dd:nth-child(4)"
term: "dd:nth-child(6)"
price_sold: ".object-header__price--historic"
last_ask_price: ".object-kenmerken-list:nth-child(2) .fd-align-items-center:nth-child(2)"
last_ask_price_m2: ".object-kenmerken-list__asking-price"
photo: ".media-viewer-overview__section-list-item--photo img[data-lazy]"