from funda_scraper import FundaScraper import googlemaps import datetime, os, pickle, json import tqdm gmaps = googlemaps.Client(key='AIzaSyC59N9i6Akvx6T2EIU2NfzqYacNDKVm1JE') # Geocoding an address # geocode_result = gmaps.geocode('1600 Amphitheatre Parkway, Mountain View, CA') # Look up an address with reverse geocoding # reverse_geocode_result = gmaps.reverse_geocode((40.714224, -73.961452)) # # Request directions via public transit # # now = datetime.now() # # directions_result = gmaps.directions("Sydney Town Hall", # # "Parramatta, NSW", # # mode="transit", # # departure_time=now) # # Validate an address with address validation # addressvalidation_result = gmaps.addressvalidation(['1600 Amphitheatre Pk'], # regionCode='US', # locality='Mountain View', # enableUspsCass=True) def get_funda_data(): scraper = FundaScraper(url="nijkerk/beschikbaar/100000-400000/woonhuis/tuin/eengezinswoning/landhuis/+30km/", find_past=False, n_pages=81) df = scraper.run() return df out = [] ''' { "name": "Landmarc", "address": "10 Columbus Cir #3, New York, NY 10019, USA", "url": "http://landmarc-restaurant.com/", "position": {"lat": 40.7685566,"lng": -73.985375317} } ''' origin_locations = { "nfi_location" : (52.044867266861466, 4.3585175985355225), "hoogstraat_location" : (52.08725689123654, 5.147180442716177), "bakkersdijk_location" : (51.85802695253161, 4.482033956202426), "korhoen_location" : (52.5219455005375, 5.732514040876346), "harde_location" : (52.41650138296019, 5.870995170999243) } def get_distances(out_dict, destination_location): for key in origin_locations: distance = gmaps.distance_matrix(origin_locations[key], destination_location, mode = 'driving') out_dict[key] = distance['rows'][0]['elements'][0] def generate_json(houses): print(f"Doing {len(houses) * 6} requests") for i in tqdm.tqdm(range(len(houses))): out_dict = {} address = f"{houses.address.get(i)}, {houses.zip_code.get(i)}" res = gmaps.geocode(f"{address}") if len(res) == 0: print(f"{i}:Failed to get any loction for: {address}") continue if len(res) > 1: print(f"{i}:Failed to get location for: {address}, assuming 0") # continue destination_location = res[0]['geometry']['location'] # distance_matrix = gmaps.distance_matrix(origin_locations['nfi_location'], destination_location, mode = 'driving') out_dict['name'] = address out_dict['position'] = destination_location for key in houses.keys(): out_dict[key] = houses.__getattr__(key).get(i) get_distances(out_dict, destination_location) out.append(out_dict) final = json.dumps(out, indent=2) out_f = open('out.json', 'w') out_f.writelines(final) if __name__ == "__main__": if os.path.exists('panda_dump.bin'): data = pickle.load(open('panda_dump.bin', 'rb')) else: data = get_funda_data() pickle.dump(data, open('panda_dump.bin', 'wb')) generate_json(data)