83 lines
2.7 KiB
Python
83 lines
2.7 KiB
Python
import geopy
|
|
from funda_scraper import FundaScraper
|
|
import datetime, os, pickle, json, tqdm
|
|
import osrm
|
|
|
|
OSRM_HOST = 'http://www.herreweb.nl:5998'
|
|
NOMINATIM_HOST = 'geocode.herreweb.nl'
|
|
|
|
osrm_c = osrm.Client(host=OSRM_HOST)
|
|
nomi_c = geopy.Nominatim(domain=NOMINATIM_HOST, user_agent="Project Usse", scheme="https")
|
|
|
|
# Define locations to calculate distances from
|
|
ORIGIN_LOCATIONS = {
|
|
"nfi_location" : (4.3585175985355225, 52.044867266861466),
|
|
"hoogstraat_location" : (5.147180442716177, 52.08725689123654),
|
|
"bakkersdijk_location" : (4.482033956202426, 51.85802695253161),
|
|
"korhoen_location" : (5.732514040876346, 52.5219455005375),
|
|
"harde_location" : (5.870995170999243, 52.41650138296019)
|
|
}
|
|
|
|
out = []
|
|
|
|
def get_funda_data():
|
|
scraper = FundaScraper(url="nijkerk/beschikbaar/100000-400000/woonhuis/tuin/eengezinswoning/landhuis/+30km/", find_past=False, n_pages=81)
|
|
df = scraper.run()
|
|
return df
|
|
|
|
def get_distances(out_dict, destination_location):
|
|
for key in ORIGIN_LOCATIONS:
|
|
distance = osrm_c.route(coordinates=[destination_location, ORIGIN_LOCATIONS[key]])
|
|
out_dict[key] = distance['routes'][0]
|
|
|
|
def generate_json(houses):
|
|
count = 0
|
|
for i in tqdm.tqdm(range(len(houses))):
|
|
count += 1
|
|
out_dict = {}
|
|
zip_code = houses.zip_code.get(i)
|
|
|
|
if zip_code == "na":
|
|
continue
|
|
|
|
#TODO filter
|
|
# 3845 HarderwijkMuziekland
|
|
# 8245 LelystadWarande
|
|
# 3862 NijkerkColtoflaan/van
|
|
|
|
address_l = f"{houses.zip_code.get(i)}".split(" ")
|
|
address = f"{address_l[0]} {address_l[1]}"
|
|
|
|
res = nomi_c.geocode(address)
|
|
|
|
# res = gmaps.geocode(f"{address}")
|
|
if res == None:
|
|
print(f"{i}:Failed to get any loction for: {address}")
|
|
continue
|
|
|
|
destination_location = res.point
|
|
destination_location = [destination_location.longitude, destination_location.latitude]
|
|
|
|
# distance_matrix = gmaps.distance_matrix(origin_locations['nfi_location'], destination_location, mode = 'driving')
|
|
|
|
out_dict['name'] = f"{address}_{count}" # Fix for duplicate names in dictionary.
|
|
out_dict['position'] = destination_location
|
|
for key in houses.keys():
|
|
out_dict[key] = houses.__getattr__(key).get(i)
|
|
|
|
get_distances(out_dict, destination_location)
|
|
|
|
out.append(out_dict)
|
|
|
|
|
|
final = json.dumps(out, indent=2)
|
|
out_f = open('out.json', 'w')
|
|
out_f.writelines(final)
|
|
|
|
if __name__ == "__main__":
|
|
if os.path.exists('panda_dump.bin'):
|
|
data = pickle.load(open('panda_dump.bin', 'rb'))
|
|
else:
|
|
data = get_funda_data()
|
|
pickle.dump(data, open('panda_dump.bin', 'wb'))
|
|
generate_json(data) |