{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Network Distance with LION map (weight)\n", "We will attempt to use networkx library with the lion map to calculate network distance for people walking between two points.\n", "What I learned:\n", "- weights need to be specified in order to calculate the right shortest_path\n", "- edges between nodes do not have have physical distance unless you specify the attribute in the edges" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "import geopandas as gpd\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import networkx as nx\n", "from shapely.geometry import box, LineString, Point,MultiPoint\n", "import os\n", "import sys\n", "import fiona\n", "import numpy as np\n", "\n", "sys.path.append(os.path.realpath('..'))" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.8.4\n" ] } ], "source": [ "print(fiona.__version__)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2.3\n" ] } ], "source": [ "print(nx.__version__)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Lion" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "shp_file = r\"../intermediate_data/lion_gdf2012/lion_gdf2012.shp\"\n", "\n", "lion_shp = gpd.read_file(shp_file)\n", "lion_graph = nx.read_shp(shp_file, simplify=False)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "G = lion_graph.to_undirected()\n", "nodeslist = list(G.nodes)\n", "G = nx.convert_node_labels_to_integers(G, first_label = 0)\n", "nodesnumlist = list(G.nodes)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a nodes geopandas dataframe so that we can extract the geometries later" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "node_df = pd.DataFrame(list(zip(nodesnumlist, nodeslist)), \n", " columns =['node_id', 'geometry'])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "node_df['geometry'] = node_df['geometry'].apply(Point)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "node_gdf = gpd.GeoDataFrame(node_df, geometry='geometry')\n", "node_gdf.crs = {'init': 'epsg:4326'}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Examine the attributes of the edges. Locate the variable that gives the distance of the edge" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0,\n", " 1,\n", " {'Street': 'EAST 168 STREET',\n", " 'SAFStreetN': None,\n", " 'FeatureTyp': '0',\n", " 'SegmentTyp': 'U',\n", " 'IncExFlag': None,\n", " 'RB_Layer': 'B',\n", " 'NonPed': None,\n", " 'TrafDir': 'T',\n", " 'TrafSrc': 'DOT',\n", " 'SpecAddr': None,\n", " 'FaceCode': '2510',\n", " 'SeqNum': '03070',\n", " 'StreetCode': '226700',\n", " 'SAFStreetC': None,\n", " 'LGC1': '01',\n", " 'LGC2': None,\n", " 'LGC3': None,\n", " 'LGC4': None,\n", " 'LGC5': None,\n", " 'LGC6': None,\n", " 'LGC7': None,\n", " 'LGC8': None,\n", " 'LGC9': None,\n", " 'BOE_LGC': '1',\n", " 'SegmentID': '0078126',\n", " 'SegCount': '1',\n", " 'LocStatus': 'X',\n", " 'LZip': '10456',\n", " 'RZip': '10456',\n", " 'LBoro': 2.0,\n", " 'RBoro': 2.0,\n", " 'L_CD': '203',\n", " 'R_CD': '203',\n", " 'LATOMICPOL': '401',\n", " 'RATOMICPOL': '101',\n", " 'LCT2010': '149',\n", " 'LCT2010Suf': None,\n", " 'RCT2010': '185',\n", " 'RCT2010Suf': None,\n", " 'LCB2010': '3001',\n", " 'LCB2010Suf': None,\n", " 'RCB2010': '2000',\n", " 'RCB2010Suf': None,\n", " 'LCT2000': '149',\n", " 'LCT2000Suf': None,\n", " 'RCT2000': '137',\n", " 'RCT2000Suf': None,\n", " 'LCB2000': '4000',\n", " 'LCB2000Suf': None,\n", " 'RCB2000': '1000',\n", " 'RCB2000Suf': None,\n", " 'LCT1990': '149',\n", " 'LCT1990Suf': None,\n", " 'RCT1990': '137',\n", " 'RCT1990Suf': None,\n", " 'LAssmDist': '79',\n", " 'LElectDist': '047',\n", " 'RAssmDist': '79',\n", " 'RElectDist': '050',\n", " 'SplitElect': None,\n", " 'LSchlDist': '09',\n", " 'RSchlDist': '09',\n", " 'SplitSchl': None,\n", " 'LSubSect': '1B',\n", " 'RSubSect': '1B',\n", " 'SanDistInd': None,\n", " 'MapFrom': '3D',\n", " 'MapTo': '3D',\n", " 'BoroBndry': None,\n", " 'MH_RI_Flag': None,\n", " 'XFrom': 1010964,\n", " 'YFrom': 241812,\n", " 'XTo': 1011304,\n", " 'YTo': 241537,\n", " 'ArcCenterX': 0,\n", " 'ArcCenterY': 0,\n", " 'CurveFlag': None,\n", " 'Radius': 0,\n", " 'NodeIDFrom': '0047740',\n", " 'NodeIDTo': '0047827',\n", " 'NodeLevelF': 'M',\n", " 'NodeLevelT': 'M',\n", " 'ConParity': None,\n", " 'Twisted': None,\n", " 'RW_TYPE': '1',\n", " 'PhysicalID': 35231.0,\n", " 'GenericID': 30694.0,\n", " 'NYPDID': None,\n", " 'FDNYID': None,\n", " 'LBlockFace': None,\n", " 'RBlockFace': None,\n", " 'LegacyID': '0078126',\n", " 'Status': '2',\n", " 'StreetWidt': None,\n", " 'StreetWi_1': None,\n", " 'BikeLane': None,\n", " 'FCC': None,\n", " 'ROW_Type': None,\n", " 'LLo_Hyphen': '599',\n", " 'LHi_Hyphen': '699',\n", " 'RLo_Hyphen': '596',\n", " 'RHi_Hyphen': '716',\n", " 'FromLeft': 599,\n", " 'ToLeft': 699,\n", " 'FromRight': 596,\n", " 'ToRight': 716,\n", " 'Join_ID': '2251001000000',\n", " 'SHAPE_Leng': 437.2820298731641,\n", " 'ShpName': 'lion_gdf2012',\n", " 'Wkb': b'\\x00\\x00\\x00\\x00\\x02\\x00\\x00\\x00\\x02\\xc0Ry\\xd2f\\xa8\\xb8=@DjI\\x08\\x7f\\xf2a\\xc0Ry\\xbeI\\xc4\\xba\\x9b@Dj0EZ\\x1a6',\n", " 'Wkt': 'LINESTRING (-73.90346685864 40.8303537964573,-73.9022392674619 40.8295981111482)',\n", " 'Json': '{ \"type\": \"LineString\", \"coordinates\": [ [ -73.903466858640016, 40.83035379645731 ], [ -73.902239267461866, 40.829598111148172 ] ] }'})" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(G.edges(data=True))[0]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "node_id 174251\n", "geometry 174251\n", "dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "node_gdf.count()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "lion_shp.crs = {'init': 'epsg:4326'}\n", "node_gdf.crs = {'init': 'epsg:4326'}" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Network analysis!" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "orig_node = 30502\n", "target_node = 42603" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To calculate shortest path network distance, we have to specify the weight. Here weight is defined as `SHAPE_Leng` - the length provided by LION." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If we do not specify the weight, we might get the longer path." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "route = nx.shortest_path(G=G, source=orig_node, target=target_node, weight = None)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "route_nodes = node_gdf.loc[route]\n", "route_line = LineString(list(route_nodes.geometry.values))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "Make a geodataframe to store the data" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "route_geom = gpd.GeoDataFrame(crs=node_gdf.crs)\n", "route_geom['geometry'] = None\n", "route_geom['id'] = None" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Add the information into the geodataframe" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "route_geom.loc[0, 'geometry'] = route_line\n", "route_geom.loc[0, 'id'] = str(list(route_nodes['node_id'].values))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Specifying the weight as the length of the edge" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "route = nx.shortest_path(G=G, source=orig_node, target=target_node, weight = 'SHAPE_Leng')" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "Capture the route nodes and turn it into a `LineString`" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "route_nodes = node_gdf.loc[route]\n", "route_line = LineString(list(route_nodes.geometry.values))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "Add the information into the geodataframe" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "route_geom.loc[1, 'geometry'] = route_line\n", "route_geom.loc[1, 'id'] = str(list(route_nodes['node_id'].values))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | geometry | \n", "id | \n", "
---|---|---|
0 | \n", "LINESTRING (-73.95950539213287 40.618059451023... | \n", "[30502, 41483, 35956, 35955, 30410, 30350, 304... | \n", "
1 | \n", "LINESTRING (-73.95950539213287 40.618059451023... | \n", "[30502, 41483, 35956, 35962, 35958, 30463, 465... | \n", "