Extracting LION from BYTES of the BIG APPLE

[2]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from shapely.geometry import box, LineString, Point,MultiPoint
import os
import sys
import requests
from zipfile import ZipFile as zzip
import fiona
from scipy.spatial import cKDTree
import numpy as np
import time

sys.path.append(os.path.realpath('..'))

Downloading the LION - File Geodatabase

[3]:
path = r"https://www1.nyc.gov/assets/planning/download/zip/data-maps/open-data/"
[5]:
# Loop type: Iterating over a tuple
t = ("nyc_lion16a", "nyc_lion15a", "nyc_lion14aav", "nyc_lion13a","nyc_lion12aav","nyc_lion11aav","nyc_lion10aav","nyc_lion09ashp")

for i in t:
    #print(path+i)
    # download the file contents in binary format
    r = requests.get(path+i+".zip")
    # open method to open a file on your system and write the contents
    with open("../input_data/"+i+".zip", "wb") as file:
        file.write(r.content)
[6]:
t = ("nyc_lion16a", "nyc_lion15a", "nyc_lion14aav", "nyc_lion13a","nyc_lion12aav","nyc_lion11aav","nyc_lion10aav","nyc_lion09ashp")

for i in t:
    fp = "../input_data/"+i+".zip"

    # opening the zip file in READ mode
    with zzip(fp, 'r') as file:
        # printing all the contents of the zip file
        #file.printdir()
        path = "../input_data/"+i
        os.mkdir(path)
        # extracting all the files
        #rint('Extracting all the files now...')
        file.extractall(path)
        print('Done!')
Done!
Done!
Done!

Extracting the lion layer

  • lion geodatabase for year 2010 had issues with opening the file

  • see lion_gdf = gpd.read_file("../input_data/nyc_lion10aav/lion/lion.gdb", driver='OpenFileGDB', layer='lion')

  • Used an extracted lion layer from 2010 from a colleague who opened the gdb with arcGIS

[11]:
gdb_paths = [
    ["lion_gdf2009","../input_data/nyc_lion09ashp/lion/lion.shp"],
    ["lion_gdf2010", "../input_data/nyc_lion10aav/lion_erilia/lion_generic_2010.shp"],
    ["lion_gdf2011","../input_data/nyc_lion11aav/lion/lion.gdb"]
]

for i in range(len(gdb_paths)):
    print("Reading in file: " + gdb_paths[i][1])
    try:
        lion_gdf = gpd.read_file(gdb_paths[i][1], driver='OpenFileGDB', layer='lion')
        lion_gdf = lion_gdf.to_crs({'init': 'epsg:4326'})
    except:
        lion_gdf = gpd.read_file(gdb_paths[i][1])

    print("Removing the non pedestrian accessible roads/paths")
    lion_gdf['todrop'] = (lion_gdf['NonPed'] == 'V') |(lion_gdf['FeatureTyp'].isin(['F','9','1','7','3'])) |(lion_gdf['TrafDir'].isin([' ']))

    print("Creating dataframe: " +gdb_paths[i][0])
    clean_lion_gdf = gpd.GeoDataFrame(lion_gdf.loc[lion_gdf['todrop'] == False])
    clean_lion_gdf.drop(['todrop'], axis = 1, inplace = True)
    clean_lion_gdf['distance'] = clean_lion_gdf['geometry'].length

    print("Creating new folder -" + gdb_paths[i][0]+ "- in intermediate_data")
    path = "../intermediate_data/"+gdb_paths[i][0]
    try:
        os.mkdir(path)
    except:
        print("Folder already made...")

    #Saving the clean_lion_gdf in their respective folders
    fp = path+"/"+gdb_paths[i][0]+".shp"
    print("Writing data to "+ fp)
    clean_lion_gdf.to_file(fp)
    print("Done!")
Reading in file: ../input_data/nyc_lion09ashp/lion/lion.shp
Removing the non pedestrian accessible roads/paths
Creating dataframe: lion_gdf2009
Creating new folder -lion_gdf2009- in intermediate_data
Folder already made...
Writing data to ../intermediate_data/lion_gdf2009/lion_gdf2009.shp
Done!
Reading in file: ../input_data/nyc_lion10aav/lion_erilia/lion_generic_2010.shp
Removing the non pedestrian accessible roads/paths
Creating dataframe: lion_gdf2010
Creating new folder -lion_gdf2010- in intermediate_data
Folder already made...
Writing data to ../intermediate_data/lion_gdf2010/lion_gdf2010.shp
Done!
Reading in file: ../input_data/nyc_lion11aav/lion/lion.gdb
Removing the non pedestrian accessible roads/paths
Creating dataframe: lion_gdf2011
Creating new folder -lion_gdf2011- in intermediate_data
Folder already made...
Writing data to ../intermediate_data/lion_gdf2011/lion_gdf2011.shp
Done!
[7]:
gdb_paths = [
    ["lion_gdf2012","../input_data/nyc_lion12aav/lion/lion.gdb"],
    ["lion_gdf2013","../input_data/nyc_lion13a/lion/lion.gdb"],
    ["lion_gdf2014","../input_data/nyc_lion14aav/lion/lion.gdb"],
    ["lion_gdf2015","../input_data/nyc_lion15a/lion.gdb"],
    ["lion_gdf2016","../input_data/nyc_lion16a/lion/lion.gdb"]
]

for i in range(len(gdb_paths)):
    print("Reading in file: " + gdb_paths[i][1])
    try:
        lion_gdf = gpd.read_file(gdb_paths[i][1], driver='FileGDB', layer='lion')
        lion_gdf = lion_gdf.to_crs({'init': 'epsg:4326'})
    except:
        lion_gdf = gpd.read_file(gdb_paths[i][1])

    print("Removing the non pedestrian accessible roads/paths")
    lion_gdf['todrop'] = (lion_gdf['NonPed'] == 'V') |(lion_gdf['FeatureTyp'].isin(['F','9','1','7','3'])) |(lion_gdf['TrafDir'].isin([' ']))

    print("Creating dataframe: " +gdb_paths[i][0])
    clean_lion_gdf = gpd.GeoDataFrame(lion_gdf.loc[lion_gdf['todrop'] == False])
    clean_lion_gdf.drop(['todrop'], axis = 1, inplace = True)
    clean_lion_gdf['distance'] = clean_lion_gdf['geometry'].length

    print("Creating new folder -" + gdb_paths[i][0]+ "- in intermediate_data")
    path = "../intermediate_data/"+gdb_paths[i][0]
    try:
        os.mkdir(path)
    except:
        print("Folder already made...")

    #Saving the clean_lion_gdf in their respective folders
    fp = path+"/"+gdb_paths[i][0]+".shp"
    print("Writing data to "+ fp)
    clean_lion_gdf.to_file(fp)
    print("Done!")
Reading in file: ../input_data/nyc_lion12aav/lion/lion.gdb
Removing the non pedestrian accessible roads/paths
Creating dataframe: lion_gdf2012
Creating new folder -lion_gdf2012- in intermediate_data
Writing data to
Writing data to ../intermediate_data/lion_gdf2012/lion_gdf2012.shp
Done!
Reading in file: ../input_data/nyc_lion13a/lion/lion.gdb
Removing the non pedestrian accessible roads/paths
Creating dataframe: lion_gdf2013
Creating new folder -lion_gdf2013- in intermediate_data
Writing data to
Writing data to ../intermediate_data/lion_gdf2013/lion_gdf2013.shp
Done!
Reading in file: ../input_data/nyc_lion14aav/lion/lion.gdb
Removing the non pedestrian accessible roads/paths
Creating dataframe: lion_gdf2014
Creating new folder -lion_gdf2014- in intermediate_data
Writing data to
Writing data to ../intermediate_data/lion_gdf2014/lion_gdf2014.shp
Done!
Reading in file: ../input_data/nyc_lion15a/lion.gdb
Removing the non pedestrian accessible roads/paths
Creating dataframe: lion_gdf2015
Creating new folder -lion_gdf2015- in intermediate_data
Writing data to
Writing data to ../intermediate_data/lion_gdf2015/lion_gdf2015.shp
Done!
Reading in file: ../input_data/nyc_lion16a/lion/lion.gdb
Removing the non pedestrian accessible roads/paths
Creating dataframe: lion_gdf2016
Creating new folder -lion_gdf2016- in intermediate_data
Writing data to
Writing data to ../intermediate_data/lion_gdf2016/lion_gdf2016.shp
Done!