In [1]:
import osmnx as ox
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
from geopandas import GeoSeries
from shapely.ops import cascaded_union
import networkx as nx
import numpy as np
import shapely.geometry
from tqdm import tqdm
import matplotlib.pyplot as plt
In [2]:
import pysal as ps
from scipy.sparse import csr_matrix
/anaconda3/envs/Nessy/lib/python3.6/site-packages/pysal/model/spvcm/abstracts.py:10: UserWarning: The `dill` module is required to use the sqlite backend fully.
  from .sqlite import head_to_sql, start_sql
In [3]:
# 台北的 shapefile
tw = gpd.read_file('./../../../../宸訊/lisa/data/taiwan_county/taiwan_county.shp')
tpe = tw.query("COUNTY == '台北市' or COUNTY == '新北市'")
tpe['geometry'].plot()
Out[3]:
<matplotlib.axes._subplots.AxesSubplot at 0x116450c50>
In [4]:
tpe = tpe.to_crs(epsg = 4326)
tpe = tpe.reset_index(drop = True)
In [5]:
# 合併台北、新北市
tpe = gpd.GeoSeries(cascaded_union(tpe['geometry']))
tpe.plot()
Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x116768898>
In [6]:
# 用 open street map 取得路網
G = ox.graph_from_polygon(tpe[0], network_type='drive')
ox.plot_graph(G)
Out[6]:
(<Figure size 592.935x432 with 1 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x125866b38>)
In [7]:
proj_G = ox.project_graph(G, to_crs = {'init':'epsg:3826'}) # 投影
nodes_proj, edges_proj = ox.graph_to_gdfs(proj_G, nodes=True, edges=True) # 節點+邊
edges = edges_proj[['geometry', 'length', 'name', 'osmid', 'u', 'v']]
edges['geometry'].plot()
Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x15b7605c0>
In [8]:
edges.head()
Out[8]:
geometry length name osmid u v
0 LINESTRING (302408.0021415878 2766629.76622994... 102.552 永寧街 233926871 2318925826 2422408856
1 LINESTRING (302408.0021415878 2766629.76622994... 168.936 永寧街 233926871 2318925826 2422408853
2 LINESTRING (302408.0021415878 2766629.76622994... 90.877 永安街 354524186 2318925826 2069531106
3 LINESTRING (302408.0021415878 2766629.76622994... 125.222 永安街 354524186 2318925826 2318925899
4 LINESTRING (295091.1520872006 2760299.00084978... 390.707 南天母路 407990816 1749811204 1470433508

bus stop

In [9]:
stop = pd.read_csv('./busstion.csv', encoding = "big5")
In [10]:
stop['Freq'] = stop['Freq'].apply(lambda t: t if t != np.inf else 1/18)
In [11]:
stop = stop[['BusInfo.Id', 'BusInfo.longitude', 'BusInfo.latitude', 'Freq']]
In [12]:
stop = stop.groupby('BusInfo.Id').mean().reset_index()
In [13]:
bus_geom = stop[['BusInfo.longitude', 'BusInfo.latitude']].apply(Point, 1)
In [14]:
stop['geometry'] = bus_geom
In [15]:
stop = gpd.GeoDataFrame(stop, crs = {'init':'epsg:4326'}, geometry = bus_geom)
In [16]:
stop = stop.to_crs(epsg = 3826)
In [65]:
stop.plot(markersize = 0.5, figsize = (8, 8))
Out[65]:
<matplotlib.axes._subplots.AxesSubplot at 0x16837e2b0>
In [18]:
# 找每個公車站在 Graph 上的最近 node
neighbor = ox.utils.get_nearest_nodes(proj_G, stop['geometry'].x, stop['geometry'].y)
neighbor = list(neighbor)
len(neighbor) 
Out[18]:
2665
In [19]:
stop['osmid'] = neighbor
nodes = nodes_proj[['lat', 'lon', 'geometry', 'osmid']]
In [20]:
len(nodes)
Out[20]:
24452
In [21]:
len(stop)
Out[21]:
2665
In [22]:
stops = pd.merge(stop, nodes, on = 'osmid', how = 'left')
In [23]:
len(stops)
Out[23]:
2665
In [24]:
stops = stops.groupby('osmid').mean()
In [25]:
stops = stops.reset_index()
In [26]:
stops.head()
Out[26]:
osmid BusInfo.Id BusInfo.longitude BusInfo.latitude Freq lat lon
0 32615825 26844.0 121.573548 25.124948 2.788807 25.053656 121.516136
1 181087075 35862.0 121.487643 25.057570 72.343535 25.203951 121.448920
2 181106287 27738.0 121.566049 25.080486 95.024638 25.259072 121.500750
3 181183111 30536.0 121.514615 25.015012 4.038462 25.025675 121.516453
4 181183203 59743.0 121.535330 25.118070 38.676615 25.029802 121.515333
In [27]:
# 再轉一次
stops['lat'] = stops['lat'].astype(float)
stops['lon'] = stops['lon'].astype(float)

geometry = stops[['lon', 'lat']].apply(Point, 1)
stops = gpd.GeoDataFrame(stops, crs = {'init':'epsg:4326'}, geometry = geometry)
stops = stops.to_crs(epsg = 3826)
In [28]:
# 只是確定一下位置有沒有問題
ax = nodes.plot()
stops.plot(ax = ax)
Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x124ad7f28>
In [29]:
# 取 geometry 站點位置
stops_loc = stops[['osmid', 'geometry']]
stops_loc = pd.DataFrame(stops_loc)
stops_loc = gpd.GeoDataFrame(stops_loc, geometry = stops_loc['geometry'], crs = {'init':'epsg=3826'})
stops_loc['x'] = stops_loc['geometry'].x
stops_loc['y'] = stops_loc['geometry'].y
stops_loc = stops_loc[['osmid', 'x', 'y']]
len(stops_loc)
Out[29]:
2491
In [30]:
# 計算兩點間最短路徑
def Distance(inp):
    start = inp[0]
    end = inp[1]
    try:
        route = nx.shortest_path(proj_G, start, end, weight='length')

        # route 的距離
        nodes_ = nodes.loc[route, 'geometry'].tolist()
        length = shapely.geometry.LineString(nodes_).length
        return length
    except:
        print(start, end)
In [31]:
bandwidth = 1000
In [32]:
# buffer
print('buffer')
stop_copy = stops.copy()
stop_copy['geometry'] = stops.geometry.buffer(bandwidth)
stop_copy = gpd.sjoin(stop_copy, stops, op='contains')
stop_copy = stop_copy[['osmid_left', 'osmid_right']]

# 最短路徑,建立sparce matrix
stop_copy = stop_copy.query('osmid_left != osmid_right') # 扣掉自己

# 加回位置
stop_copy = pd.merge(stop_copy, stops_loc, left_on = 'osmid_left', right_on = 'osmid')
stop_copy = stop_copy.rename(columns = {'x':'center_x', 'y':'center_y'})

stop_copy = pd.merge(stop_copy, stops_loc, left_on = 'osmid_right', right_on = 'osmid')
stop_copy = stop_copy.rename(columns = {'x':'neighbor_x', 'y':'neighbor_y'})

del stop_copy['osmid_x']
del stop_copy['osmid_y']
buffer
In [42]:
len(stop_copy)
Out[42]:
53314
In [43]:
# 算出兩兩節點之間最短路徑
stop_copy['distance'] = stop_copy[['osmid_left', 'osmid_right']].apply(Distance, 1)
292783106 5179289744
648173196 5179289744
654326129 5179289744
656132459 5179289744
5844312167 5179289744
656132474 5179289744
656132475 5179289744
656132476 5179289744
3208478132 5179289744
661200997 5179289744
661201062 5179289744
631669085 5179289744
648172761 5179289744
648172874 5179289744
648172971 5179289744
631669187 5179289744
631669226 5179289744
631669266 5179289744
631669300 5179289744
648172837 5179289744
1446842433 5179289744
1446842498 5179289744
2940178812 5179289744
619136847 5179289744
631669289 5179289744
3049983742 5179289744
619136812 5179289744
619136845 5179289744
619136899 5179289744
622617930 5179289744
631668926 5179289744
655375236 5179289744
655375244 5179289744
623980147 5179289744
631822018 5179289744
3086736515 5179289744
622617649 5179289744
623980090 5179289744
631821694 5179289744
631821753 5179289744
3179389657 5179289744
619120383 5179289744
619137046 5179289744
2089915604 5179289744
662161331 4447907271
654208337 655391315
662161533 655391323
632565191 655391323
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

Traceback (most recent call last):
  File "/anaconda3/envs/Nessy/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3296, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-43-28b5d71c5244>", line 2, in <module>
    stop_copy['distance'] = stop_copy[['osmid_left', 'osmid_right']].apply(Distance, 1)
  File "/anaconda3/envs/Nessy/lib/python3.6/site-packages/pandas/core/frame.py", line 6487, in apply
    return op.get_result()
  File "/anaconda3/envs/Nessy/lib/python3.6/site-packages/pandas/core/apply.py", line 151, in get_result
    return self.apply_standard()
  File "/anaconda3/envs/Nessy/lib/python3.6/site-packages/pandas/core/apply.py", line 251, in apply_standard
    labels=labels)
  File "pandas/_libs/reduction.pyx", line 641, in pandas._libs.reduction.reduce
  File "pandas/_libs/reduction.pyx", line 141, in pandas._libs.reduction.Reducer.get_result
  File "pandas/_libs/properties.pyx", line 69, in pandas._libs.properties.AxisProperty.__set__
  File "/anaconda3/envs/Nessy/lib/python3.6/site-packages/pandas/core/series.py", line 380, in _set_axis
    self._data.set_axis(axis, labels)
  File "/anaconda3/envs/Nessy/lib/python3.6/site-packages/pandas/core/internals/managers.py", line 149, in set_axis
    old_len = len(self.axes[axis])
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/anaconda3/envs/Nessy/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2033, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/anaconda3/envs/Nessy/lib/python3.6/site-packages/IPython/core/ultratb.py", line 1095, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "/anaconda3/envs/Nessy/lib/python3.6/site-packages/IPython/core/ultratb.py", line 313, in wrapped
    return f(*args, **kwargs)
  File "/anaconda3/envs/Nessy/lib/python3.6/site-packages/IPython/core/ultratb.py", line 347, in _fixed_getinnerframes
    records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))
  File "/anaconda3/envs/Nessy/lib/python3.6/inspect.py", line 1488, in getinnerframes
    frameinfo = (tb.tb_frame,) + getframeinfo(tb, context)
  File "/anaconda3/envs/Nessy/lib/python3.6/inspect.py", line 1446, in getframeinfo
    filename = getsourcefile(frame) or getfile(frame)
  File "/anaconda3/envs/Nessy/lib/python3.6/inspect.py", line 696, in getsourcefile
    if getattr(getmodule(object, filename), '__loader__', None) is not None:
  File "/anaconda3/envs/Nessy/lib/python3.6/inspect.py", line 739, in getmodule
    f = getabsfile(module)
  File "/anaconda3/envs/Nessy/lib/python3.6/inspect.py", line 708, in getabsfile
    _filename = getsourcefile(object) or getfile(object)
  File "/anaconda3/envs/Nessy/lib/python3.6/inspect.py", line 693, in getsourcefile
    if os.path.exists(filename):
  File "/anaconda3/envs/Nessy/lib/python3.6/genericpath.py", line 19, in exists
    os.stat(path)
KeyboardInterrupt
---------------------------------------------------------------------------
In [35]:
# 建立 kernel weight
stop_neighbor = stop_copy.query('distance < {}'.format(bandwidth))
stop_neighbor['weight'] = stop_neighbor['distance'].apply(lambda t: 1 - t/bandwidth) # kernel function = trianguler

# 製作 sparse matrix id 與 站牌 id 對應
stops_num = len(stop_neighbor['osmid_left'].drop_duplicates())
ids = list(range(stops_num))
osmid = stop_neighbor['osmid_left'].unique()
osmid_id = pd.DataFrame({
    'index': ids,
    'osmid': osmid
})

stop_sparse = pd.merge(stop_neighbor, osmid_id, left_on = 'osmid_left', right_on = 'osmid')
stop_sparse = stop_sparse.rename(columns = {'index':'index_left'})
stop_sparse = pd.merge(stop_sparse, osmid_id, left_on = 'osmid_right', right_on = 'osmid')
stop_sparse = stop_sparse.rename(columns = {'index':'index_right'})
del stop_sparse['osmid_x']
del stop_sparse['osmid_y']

# sparse matrix
row = np.array(stop_sparse['index_left'].astype(int))
col = np.array(stop_sparse['index_right'].astype(int))
weight = np.array(stop_sparse['weight'].astype(float))

# 建立權重
sparse = csr_matrix((weight, (row, col)), shape=(max(row)+1, max(row)+1))
wsp = ps.lib.weights.WSP(sparse)
w = ps.lib.weights.WSP2W(wsp)

# 對應的 frequency
stop_freq = pd.merge(osmid_id, stops[['osmid', 'Freq']].drop_duplicates(), on = 'osmid')
value = np.array(stop_freq['Freq'])

# global moran's I
gmI = ps.explore.esda.Moran(value, w, two_tailed=False).I
print(gmI)
/anaconda3/envs/Nessy/lib/python3.6/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
/anaconda3/envs/Nessy/lib/python3.6/site-packages/pysal/lib/weights/weights.py:168: UserWarning: There are 9 disconnected observations 
  Island ids: 279, 648, 914, 1961, 1979, 2111, 2129, 2259, 2260
  " Island ids: %s" % ', '.join(str(island) for island in self.islands))
('WARNING: ', 279, ' is an island (no neighbors)')
('WARNING: ', 648, ' is an island (no neighbors)')
('WARNING: ', 914, ' is an island (no neighbors)')
('WARNING: ', 1961, ' is an island (no neighbors)')
('WARNING: ', 1979, ' is an island (no neighbors)')
('WARNING: ', 2111, ' is an island (no neighbors)')
('WARNING: ', 2129, ' is an island (no neighbors)')
('WARNING: ', 2259, ' is an island (no neighbors)')
('WARNING: ', 2260, ' is an island (no neighbors)')
0.008252388535088606
In [36]:
lm = ps.explore.esda.Moran_Local(value, w)
/anaconda3/envs/Nessy/lib/python3.6/site-packages/pysal/explore/esda/moran.py:895: RuntimeWarning: invalid value encountered in true_divide
  self.z_sim = (self.Is - self.EI_sim) / self.seI_sim
/anaconda3/envs/Nessy/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py:901: RuntimeWarning: invalid value encountered in greater
  return (a < x) & (x < b)
/anaconda3/envs/Nessy/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py:901: RuntimeWarning: invalid value encountered in less
  return (a < x) & (x < b)
/anaconda3/envs/Nessy/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py:1807: RuntimeWarning: invalid value encountered in greater_equal
  cond2 = (x >= _b) & cond0
In [37]:
lm.p_sim
Out[37]:
array([0.221, 0.242, 0.123, ..., 0.405, 0.294, 0.498])
In [38]:
lm.q
Out[38]:
array([3, 3, 3, ..., 3, 3, 3])
In [39]:
stop_freq['p'] = lm.p_sim
In [40]:
stop_freq['q'] = lm.q
In [41]:
stop_plot = pd.merge(stop_freq, stops_loc, on = 'osmid')
In [44]:
HH = stop_plot.query('p < 0.05 and q == 1').copy()
LL = stop_plot.query('p < 0.05 and q == 3').copy()
In [49]:
HH['geometry'] = HH[['x', 'y']].apply(shapely.geometry.Point, axis=1)
LL['geometry'] = LL[['x', 'y']].apply(shapely.geometry.Point, axis=1)
In [75]:
HH = gpd.GeoDataFrame(HH, crs={'init': 'epsg:3826'})
LL = gpd.GeoDataFrame(LL, crs={'init': 'epsg:3826'})
# HH = HH.to_crs(epsg=4326)
In [274]:
HH.to_file('HH.geojson', driver='GeoJSON')
In [275]:
# hh = stop_plot.query('q == 1')
In [47]:
%matplotlib ipympl
In [76]:
ax = edges.plot(alpha = 0.1, figsize = (8, 8), color = 'gray')
HH.plot(ax = ax, c = 'red', markersize = 0.7)
LL.plot(ax = ax, c = "blue", markersize = 0.7)
/anaconda3/envs/Nessy/lib/python3.6/site-packages/geopandas/plotting.py:413: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  fig, ax = plt.subplots(figsize=figsize)
Out[76]:
<matplotlib.axes._subplots.AxesSubplot at 0x17ff80198>
In [77]:
ax = edges.plot(alpha = 0.05, figsize = (10, 10), color = 'gray')
plt.scatter(HH['x'], HH['y'], c = 'red', s = 0.8)
plt.scatter(LL['x'], LL['y'], s = 0.8)
/anaconda3/envs/Nessy/lib/python3.6/site-packages/geopandas/plotting.py:413: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  fig, ax = plt.subplots(figsize=figsize)
Out[77]:
<matplotlib.collections.PathCollection at 0x191f11dd8>