2023-12-22 15:26:01 +01:00
|
|
|
import pandas as pd
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
from funda_scraper.preprocess import preprocess_data
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def input_data():
|
|
|
|
data = {
|
|
|
|
"url": ["https://www.funda.nl/koop/utrecht/appartement-00000000-dummy-100/"],
|
|
|
|
"price": ["na"],
|
|
|
|
"address": ["dummy 10"],
|
|
|
|
"descrip": ["dummy"],
|
|
|
|
"listed_since": ["Verkocht"],
|
|
|
|
"zip_code": ["1111 AA"],
|
|
|
|
"size": ["100 m²"],
|
|
|
|
"year": ["2000"],
|
|
|
|
"living_area": ["78 m²"],
|
2024-04-02 22:20:30 +02:00
|
|
|
"perceel_area": ["78 m²"],
|
2023-12-22 15:26:01 +01:00
|
|
|
"kind_of_house": ["Eengezinswoning"],
|
|
|
|
"building_type": ["Bestaande bouw"],
|
|
|
|
"num_of_rooms": ["4 kamers (3 slaapkamers)"],
|
|
|
|
"num_of_bathrooms": ["1 badkamer en 1 apart toilet"],
|
|
|
|
"layout": ["Aantal kamers 4 kamers (3 slaapkamers)"],
|
|
|
|
"energy_label": ["A++++"],
|
|
|
|
"insulation": ["Dubbel glas"],
|
|
|
|
"heating": ["dummy"],
|
|
|
|
"ownership": ["dummy"],
|
|
|
|
"exteriors": ["dummy"],
|
|
|
|
"parking": ["dummy"],
|
|
|
|
"neighborhood_name": ["dummy"],
|
|
|
|
"date_list": ["30 juni 2023"],
|
|
|
|
"date_sold": ["13 juli 2023"],
|
|
|
|
"term": ["13 dagen"],
|
|
|
|
"price_sold": ["€ 500.000 k.k."],
|
|
|
|
"last_ask_price": ["€ 500.000 kosten koper"],
|
|
|
|
"last_ask_price_m2": ["dummy"],
|
|
|
|
"city": ["utrecht"],
|
|
|
|
"log_id": ["dummy"],
|
|
|
|
"photo": ['dummy']
|
|
|
|
}
|
|
|
|
return pd.DataFrame(data)
|
|
|
|
|
|
|
|
|
|
|
|
class TestPreprocessData:
|
|
|
|
def test_is_past_true(self, input_data):
|
|
|
|
df = preprocess_data(df=input_data, is_past=True)
|
|
|
|
assert df.shape == (1, 17)
|
|
|
|
assert df["house_type"].item() == "appartement"
|
|
|
|
assert df["price"].item() == 500000
|
|
|
|
assert df["room"].item() == 4
|
|
|
|
assert df["bedroom"].item() == 3
|
|
|
|
# assert df["term_days"].item() == 13
|
|
|
|
assert df["energy_label"].item() == ">A+"
|