Network Distance with LION map (weight)

We will attempt to use networkx library with the lion map to calculate network distance for people walking between two points. What I learned: - weights need to be specified in order to calculate the right shortest_path - edges between nodes do not have have physical distance unless you specify the attribute in the edges

[1]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from shapely.geometry import box, LineString, Point,MultiPoint
import os
import sys
import fiona
import numpy as np

sys.path.append(os.path.realpath('..'))
[2]:
print(fiona.__version__)
1.8.4
[3]:
print(nx.__version__)
2.3

Load Lion

[4]:
shp_file = r"../intermediate_data/lion_gdf2012/lion_gdf2012.shp"

lion_shp = gpd.read_file(shp_file)
lion_graph = nx.read_shp(shp_file, simplify=False)
[5]:
G = lion_graph.to_undirected()
nodeslist = list(G.nodes)
G = nx.convert_node_labels_to_integers(G, first_label = 0)
nodesnumlist = list(G.nodes)

Create a nodes geopandas dataframe so that we can extract the geometries later

[6]:
node_df = pd.DataFrame(list(zip(nodesnumlist, nodeslist)),
               columns =['node_id', 'geometry'])
[7]:
node_df['geometry'] = node_df['geometry'].apply(Point)
[8]:
node_gdf = gpd.GeoDataFrame(node_df, geometry='geometry')
node_gdf.crs = {'init': 'epsg:4326'}

Examine the attributes of the edges. Locate the variable that gives the distance of the edge

[9]:
list(G.edges(data=True))[0]
[9]:
(0,
 1,
 {'Street': 'EAST 168 STREET',
  'SAFStreetN': None,
  'FeatureTyp': '0',
  'SegmentTyp': 'U',
  'IncExFlag': None,
  'RB_Layer': 'B',
  'NonPed': None,
  'TrafDir': 'T',
  'TrafSrc': 'DOT',
  'SpecAddr': None,
  'FaceCode': '2510',
  'SeqNum': '03070',
  'StreetCode': '226700',
  'SAFStreetC': None,
  'LGC1': '01',
  'LGC2': None,
  'LGC3': None,
  'LGC4': None,
  'LGC5': None,
  'LGC6': None,
  'LGC7': None,
  'LGC8': None,
  'LGC9': None,
  'BOE_LGC': '1',
  'SegmentID': '0078126',
  'SegCount': '1',
  'LocStatus': 'X',
  'LZip': '10456',
  'RZip': '10456',
  'LBoro': 2.0,
  'RBoro': 2.0,
  'L_CD': '203',
  'R_CD': '203',
  'LATOMICPOL': '401',
  'RATOMICPOL': '101',
  'LCT2010': '149',
  'LCT2010Suf': None,
  'RCT2010': '185',
  'RCT2010Suf': None,
  'LCB2010': '3001',
  'LCB2010Suf': None,
  'RCB2010': '2000',
  'RCB2010Suf': None,
  'LCT2000': '149',
  'LCT2000Suf': None,
  'RCT2000': '137',
  'RCT2000Suf': None,
  'LCB2000': '4000',
  'LCB2000Suf': None,
  'RCB2000': '1000',
  'RCB2000Suf': None,
  'LCT1990': '149',
  'LCT1990Suf': None,
  'RCT1990': '137',
  'RCT1990Suf': None,
  'LAssmDist': '79',
  'LElectDist': '047',
  'RAssmDist': '79',
  'RElectDist': '050',
  'SplitElect': None,
  'LSchlDist': '09',
  'RSchlDist': '09',
  'SplitSchl': None,
  'LSubSect': '1B',
  'RSubSect': '1B',
  'SanDistInd': None,
  'MapFrom': '3D',
  'MapTo': '3D',
  'BoroBndry': None,
  'MH_RI_Flag': None,
  'XFrom': 1010964,
  'YFrom': 241812,
  'XTo': 1011304,
  'YTo': 241537,
  'ArcCenterX': 0,
  'ArcCenterY': 0,
  'CurveFlag': None,
  'Radius': 0,
  'NodeIDFrom': '0047740',
  'NodeIDTo': '0047827',
  'NodeLevelF': 'M',
  'NodeLevelT': 'M',
  'ConParity': None,
  'Twisted': None,
  'RW_TYPE': '1',
  'PhysicalID': 35231.0,
  'GenericID': 30694.0,
  'NYPDID': None,
  'FDNYID': None,
  'LBlockFace': None,
  'RBlockFace': None,
  'LegacyID': '0078126',
  'Status': '2',
  'StreetWidt': None,
  'StreetWi_1': None,
  'BikeLane': None,
  'FCC': None,
  'ROW_Type': None,
  'LLo_Hyphen': '599',
  'LHi_Hyphen': '699',
  'RLo_Hyphen': '596',
  'RHi_Hyphen': '716',
  'FromLeft': 599,
  'ToLeft': 699,
  'FromRight': 596,
  'ToRight': 716,
  'Join_ID': '2251001000000',
  'SHAPE_Leng': 437.2820298731641,
  'ShpName': 'lion_gdf2012',
  'Wkb': b'\x00\x00\x00\x00\x02\x00\x00\x00\x02\xc0Ry\xd2f\xa8\xb8=@DjI\x08\x7f\xf2a\xc0Ry\xbeI\xc4\xba\x9b@Dj0EZ\x1a6',
  'Wkt': 'LINESTRING (-73.90346685864 40.8303537964573,-73.9022392674619 40.8295981111482)',
  'Json': '{ "type": "LineString", "coordinates": [ [ -73.903466858640016, 40.83035379645731 ], [ -73.902239267461866, 40.829598111148172 ] ] }'})
[10]:
node_gdf.count()
[10]:
node_id     174251
geometry    174251
dtype: int64
[11]:
lion_shp.crs = {'init': 'epsg:4326'}
node_gdf.crs = {'init': 'epsg:4326'}

Network analysis!

[12]:
orig_node = 30502
target_node = 42603

To calculate shortest path network distance, we have to specify the weight. Here weight is defined as SHAPE_Leng - the length provided by LION.

If we do not specify the weight, we might get the longer path.

[13]:
route = nx.shortest_path(G=G, source=orig_node, target=target_node, weight = None)
[14]:
route_nodes = node_gdf.loc[route]
route_line = LineString(list(route_nodes.geometry.values))

Make a geodataframe to store the data

[15]:
route_geom = gpd.GeoDataFrame(crs=node_gdf.crs)
route_geom['geometry'] = None
route_geom['id'] = None

Add the information into the geodataframe

[16]:
route_geom.loc[0, 'geometry'] = route_line
route_geom.loc[0, 'id'] = str(list(route_nodes['node_id'].values))

Specifying the weight as the length of the edge

[17]:
route = nx.shortest_path(G=G, source=orig_node, target=target_node, weight = 'SHAPE_Leng')

Capture the route nodes and turn it into a LineString

[18]:
route_nodes = node_gdf.loc[route]
route_line = LineString(list(route_nodes.geometry.values))

Add the information into the geodataframe

[19]:
route_geom.loc[1, 'geometry'] = route_line
route_geom.loc[1, 'id'] = str(list(route_nodes['node_id'].values))
[20]:
route_geom.head()
[20]:
geometry id
0 LINESTRING (-73.95950539213287 40.618059451023... [30502, 41483, 35956, 35955, 30410, 30350, 304...
1 LINESTRING (-73.95950539213287 40.618059451023... [30502, 41483, 35956, 35962, 35958, 30463, 465...
[21]:
route_geom.loc[[0],'geometry'].plot();
_images/networkdistance_lion-issues_32_0.png
[22]:
route_geom.loc[[1],'geometry'].plot();
_images/networkdistance_lion-issues_33_0.png

Printing the route/edge using the bbox of the route_geom

[23]:
bounding_box = route_geom.envelope
df = gpd.GeoDataFrame(gpd.GeoSeries(bounding_box), columns=['geometry'])
[24]:
df['bbox'] = 1
df.crs = {'init': 'epsg:4326'}
[25]:
join_left_df = gpd.sjoin(lion_shp, df, how="left")
[26]:
join_left_df.loc[join_left_df['bbox'] == 1].plot(figsize = (10,10))
[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x28fb2e8fe10>
_images/networkdistance_lion-issues_38_1.png

Overlaying the route on top of the edge lines

[27]:
base = join_left_df.loc[join_left_df['bbox'] == 1].plot(figsize = (10,10), color = 'lightblue')
#route_geom.plot(ax=base, color='red');
route_geom.loc[[0],'geometry'].plot(ax=base, color='green');
route_geom.loc[[1],'geometry'].plot(ax=base, color='red');
_images/networkdistance_lion-issues_40_0.png

The green route was selected by networkx when no weights was specified. The red route was selected by networkx with the weights specified as length.