usse/scrape/usse_gmaps.py
2023-12-22 15:26:01 +01:00

91 lines
3.3 KiB
Python

from funda_scraper import FundaScraper
import googlemaps
import datetime, os, pickle, json
import tqdm
gmaps = googlemaps.Client(key='AIzaSyC59N9i6Akvx6T2EIU2NfzqYacNDKVm1JE')
# Geocoding an address
# geocode_result = gmaps.geocode('1600 Amphitheatre Parkway, Mountain View, CA')
# Look up an address with reverse geocoding
# reverse_geocode_result = gmaps.reverse_geocode((40.714224, -73.961452))
# # Request directions via public transit
# # now = datetime.now()
# # directions_result = gmaps.directions("Sydney Town Hall",
# # "Parramatta, NSW",
# # mode="transit",
# # departure_time=now)
# # Validate an address with address validation
# addressvalidation_result = gmaps.addressvalidation(['1600 Amphitheatre Pk'],
# regionCode='US',
# locality='Mountain View',
# enableUspsCass=True)
def get_funda_data():
scraper = FundaScraper(url="nijkerk/beschikbaar/100000-400000/woonhuis/tuin/eengezinswoning/landhuis/+30km/", find_past=False, n_pages=81)
df = scraper.run()
return df
out = []
'''
{
"name": "Landmarc",
"address": "10 Columbus Cir #3, New York, NY 10019, USA",
"url": "http://landmarc-restaurant.com/",
"position": {"lat": 40.7685566,"lng": -73.985375317}
}
'''
origin_locations = {
"nfi_location" : (52.044867266861466, 4.3585175985355225),
"hoogstraat_location" : (52.08725689123654, 5.147180442716177),
"bakkersdijk_location" : (51.85802695253161, 4.482033956202426),
"korhoen_location" : (52.5219455005375, 5.732514040876346),
"harde_location" : (52.41650138296019, 5.870995170999243)
}
def get_distances(out_dict, destination_location):
for key in origin_locations:
distance = gmaps.distance_matrix(origin_locations[key], destination_location, mode = 'driving')
out_dict[key] = distance['rows'][0]['elements'][0]
def generate_json(houses):
print(f"Doing {len(houses) * 6} requests")
for i in tqdm.tqdm(range(len(houses))):
out_dict = {}
address = f"{houses.address.get(i)}, {houses.zip_code.get(i)}"
res = gmaps.geocode(f"{address}")
if len(res) == 0:
print(f"{i}:Failed to get any loction for: {address}")
continue
if len(res) > 1:
print(f"{i}:Failed to get location for: {address}, assuming 0")
# continue
destination_location = res[0]['geometry']['location']
# distance_matrix = gmaps.distance_matrix(origin_locations['nfi_location'], destination_location, mode = 'driving')
out_dict['name'] = address
out_dict['position'] = destination_location
for key in houses.keys():
out_dict[key] = houses.__getattr__(key).get(i)
get_distances(out_dict, destination_location)
out.append(out_dict)
final = json.dumps(out, indent=2)
out_f = open('out.json', 'w')
out_f.writelines(final)
if __name__ == "__main__":
if os.path.exists('panda_dump.bin'):
data = pickle.load(open('panda_dump.bin', 'rb'))
else:
data = get_funda_data()
pickle.dump(data, open('panda_dump.bin', 'wb'))
generate_json(data)