91 lines
3.3 KiB
Python
91 lines
3.3 KiB
Python
from funda_scraper import FundaScraper
|
|
import googlemaps
|
|
import datetime, os, pickle, json
|
|
import tqdm
|
|
|
|
gmaps = googlemaps.Client(key='AIzaSyC59N9i6Akvx6T2EIU2NfzqYacNDKVm1JE')
|
|
|
|
# Geocoding an address
|
|
# geocode_result = gmaps.geocode('1600 Amphitheatre Parkway, Mountain View, CA')
|
|
|
|
# Look up an address with reverse geocoding
|
|
# reverse_geocode_result = gmaps.reverse_geocode((40.714224, -73.961452))
|
|
|
|
# # Request directions via public transit
|
|
# # now = datetime.now()
|
|
# # directions_result = gmaps.directions("Sydney Town Hall",
|
|
# # "Parramatta, NSW",
|
|
# # mode="transit",
|
|
# # departure_time=now)
|
|
|
|
# # Validate an address with address validation
|
|
# addressvalidation_result = gmaps.addressvalidation(['1600 Amphitheatre Pk'],
|
|
# regionCode='US',
|
|
# locality='Mountain View',
|
|
# enableUspsCass=True)
|
|
|
|
def get_funda_data():
|
|
scraper = FundaScraper(url="nijkerk/beschikbaar/100000-400000/woonhuis/tuin/eengezinswoning/landhuis/+30km/", find_past=False, n_pages=81)
|
|
df = scraper.run()
|
|
return df
|
|
|
|
out = []
|
|
|
|
'''
|
|
{
|
|
"name": "Landmarc",
|
|
"address": "10 Columbus Cir #3, New York, NY 10019, USA",
|
|
"url": "http://landmarc-restaurant.com/",
|
|
"position": {"lat": 40.7685566,"lng": -73.985375317}
|
|
}
|
|
'''
|
|
|
|
origin_locations = {
|
|
"nfi_location" : (52.044867266861466, 4.3585175985355225),
|
|
"hoogstraat_location" : (52.08725689123654, 5.147180442716177),
|
|
"bakkersdijk_location" : (51.85802695253161, 4.482033956202426),
|
|
"korhoen_location" : (52.5219455005375, 5.732514040876346),
|
|
"harde_location" : (52.41650138296019, 5.870995170999243)
|
|
}
|
|
|
|
def get_distances(out_dict, destination_location):
|
|
for key in origin_locations:
|
|
distance = gmaps.distance_matrix(origin_locations[key], destination_location, mode = 'driving')
|
|
out_dict[key] = distance['rows'][0]['elements'][0]
|
|
|
|
def generate_json(houses):
|
|
print(f"Doing {len(houses) * 6} requests")
|
|
for i in tqdm.tqdm(range(len(houses))):
|
|
out_dict = {}
|
|
address = f"{houses.address.get(i)}, {houses.zip_code.get(i)}"
|
|
res = gmaps.geocode(f"{address}")
|
|
if len(res) == 0:
|
|
print(f"{i}:Failed to get any loction for: {address}")
|
|
continue
|
|
if len(res) > 1:
|
|
print(f"{i}:Failed to get location for: {address}, assuming 0")
|
|
# continue
|
|
destination_location = res[0]['geometry']['location']
|
|
# distance_matrix = gmaps.distance_matrix(origin_locations['nfi_location'], destination_location, mode = 'driving')
|
|
out_dict['name'] = address
|
|
out_dict['position'] = destination_location
|
|
for key in houses.keys():
|
|
out_dict[key] = houses.__getattr__(key).get(i)
|
|
|
|
get_distances(out_dict, destination_location)
|
|
|
|
out.append(out_dict)
|
|
|
|
|
|
final = json.dumps(out, indent=2)
|
|
out_f = open('out.json', 'w')
|
|
out_f.writelines(final)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if os.path.exists('panda_dump.bin'):
|
|
data = pickle.load(open('panda_dump.bin', 'rb'))
|
|
else:
|
|
data = get_funda_data()
|
|
pickle.dump(data, open('panda_dump.bin', 'wb'))
|
|
generate_json(data) |