python - 尝试创建 geopandas 数据框时出错:输入必须是有效的几何对象:POLYGON

我正在尝试转换为 geopandas 数据框并设置 geometry 列。

样本df:

state_fips_code tract_ce    tract_geom
6               576700      POLYGON((-118.169788 33.74847, -118.167625 33.747597, -118.165187 33.747139, -118.165741 33.739433, -118.162769 33.739184, -118.162638 33.73965, -118.162508 33.740117, -118.161791 33.742436, -118.161766 33.742512, -118.160742 33.746281, -118.160714 33.746387, -118.159262 33.751741, -118.158781 33.753463, -118.157518 33.757987, -118.156954 33.760101, -118.156884 33.760362, -118.156531 33.761425, -118.15616 33.76175, -118.155959 33.762416, -118.15577 33.763042, -118.155603 33.763596, -118.155444 33.764124, -118.155268 33.764727, -118.154575 33.764584, -118.154574 33.765026, -118.15458 33.766035, -118.154581 33.766266, -118.154584 33.767057, -118.154589 33.767951, -118.15459 33.768073, -118.155696 33.768074, -118.155904 33.768074, -118.156762 33.768076, -118.157844 33.768077, -118.158646 33.768078, -118.158925 33.768079, -118.159534 33.76808, -118.159557 33.76808, -118.160268 33.76808, -118.160584 33.768081, -118.161666 33.768076, -118.161683 33.768076, -118.162772 33.768078, -118.162938 33.768078, -118.163854 33.768079, -118.16421 33.76808, -118.164738 33.768074, -118.165013 33.768097, -118.165136 33.768129, -118.165204 33.768161, -118.165299 33.768232, -118.165379 33.768324, -118.165389 33.768291, -118.165424 33.768173, -118.165457 33.768062, -118.165463 33.768042, -118.165621 33.767529, -118.16568 33.767337, -118.165806 33.766926, -118.16599 33.766328, -118.166149 33.765794, -118.166308 33.76528, -118.166632 33.764199, -118.166191 33.764109, -118.166348 33.763579, -118.16643 33.763468, -118.166608 33.762587, -118.166728 33.761999, -118.16676 33.761851, -118.166769 33.761802, -118.166792 33.761705, -118.16816 33.755431, -118.168649 33.753374, -118.169788 33.74847))


import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
from shapely.geometry import Polygon
from shapely import wkt

# Find and drop rows with bad geometry data
i = 0
geom = []
dlst = []

for g in df['tract_geom']:
    
    i = i + 1
    
    try:
        geom.append(wkt.loads(g))
    except:
        dlst.append(i)


df_geo.drop(dlst, inplace=True)

# # Convert to GeoDataFrame
df['geometry'] = df['tract_geom'].apply(wkt.loads)
df = gpd.GeoDataFrame(df, geometry='tract_geom')

追溯:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/var/folders/d0/gnksqzwn2fn46fjgrkp6045c0000gn/T/ipykernel_50055/100886836.py in <module>
      
      2 #df['geometry'] = df['tract_geom'].apply(wkt.loads)
----> 3 df = gpd.GeoDataFrame(df, geometry='tract_geom')

/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/geopandas/geodataframe.py in __init__(self, data, geometry, crs, *args, **kwargs)
    182                 _crs_mismatch_warning()
    183                 # TODO: raise error in 0.9 or 0.10.
--> 184             self.set_geometry(geometry, inplace=True)
    185 
    186         if geometry is None and crs:


TypeError: Input must be valid geometry objects: POLYGON((-118.169788 33.74847, -118.167625 33.747597, -118.165187 33.747139, -118.165741 33.739433, -118.162769 33.739184, -118.162638 33.73965, -118.162508 33.740117, -118.161791 33.742436, -118.161766 33.742512, -118.160742 33.746281, -118.160714 33.746387, -118.159262 33.751741, -118.158781 33.753463, -118.157518 33.757987, -118.156954 33.760101, -118.156884 33.760362, -118.156531 33.761425, -118.15616 33.76175, -118.155959 33.762416, -118.15577 33.763042, -118.155603 33.763596, -118.155444 33.764124, -118.155268 33.764727, -118.154575 33.764584, -118.154574 33.765026, -118.15458 33.766035, -118.154581 33.766266, -118.154584 33.767057, -118.154589 33.767951, -118.15459 33.768073, -118.155696 33.768074, -118.155904 33.768074, -118.156762 33.768076, -118.157844 33.768077, -118.158646 33.768078, -118.158925 33.768079, -118.159534 33.76808, -118.159557 33.76808, -118.160268 33.76808, -118.160584 33.768081, -118.161666 33.768076, -118.161683 33.768076, -118.162772 33.768078, -118.162938 33.768078, -118.163854 33.768079, -118.16421 33.76808, -118.164738 33.768074, -118.165013 33.768097, -118.165136 33.768129, -118.165204 33.768161, -118.165299 33.768232, -118.165379 33.768324, -118.165389 33.768291, -118.165424 33.768173, -118.165457 33.768062, -118.165463 33.768042, -118.165621 33.767529, -118.16568 33.767337, -118.165806 33.766926, -118.16599 33.766328, -118.166149 33.765794, -118.166308 33.76528, -118.166632 33.764199, -118.166191 33.764109, -118.166348 33.763579, -118.16643 33.763468, -118.166608 33.762587, -118.166728 33.761999, -118.16676 33.761851, -118.166769 33.761802, -118.166792 33.761705, -118.16816 33.755431, -118.168649 33.753374, -118.169788 33.74847))

回答1

我做了所有这些,然后意识到你只是有一个错字。

df['geometry'] = df['tract_geom'].apply(wkt.loads)

应该:

df['tract_geom'] = df['tract_geom'].apply(wkt.loads)

import pandas as pd
import geopandas as gp
import numpy as np
from shapely.wkt import loads

d = {'state_fips_code': {0: 6},
 'tract_ce': {0: 576700},
 'tract_geom': {0: 'POLYGON((-118.169788 33.74847, -118.167625 33.747597, -118.165187 33.747139, -118.165741 33.739433, -118.162769 33.739184, -118.162638 33.73965, -118.162508 33.740117, -118.161791 33.742436, -118.161766 33.742512, -118.160742 33.746281, -118.160714 33.746387, -118.159262 33.751741, -118.158781 33.753463, -118.157518 33.757987, -118.156954 33.760101, -118.156884 33.760362, -118.156531 33.761425, -118.15616 33.76175, -118.155959 33.762416, -118.15577 33.763042, -118.155603 33.763596, -118.155444 33.764124, -118.155268 33.764727, -118.154575 33.764584, -118.154574 33.765026, -118.15458 33.766035, -118.154581 33.766266, -118.154584 33.767057, -118.154589 33.767951, -118.15459 33.768073, -118.155696 33.768074, -118.155904 33.768074, -118.156762 33.768076, -118.157844 33.768077, -118.158646 33.768078, -118.158925 33.768079, -118.159534 33.76808, -118.159557 33.76808, -118.160268 33.76808, -118.160584 33.768081, -118.161666 33.768076, -118.161683 33.768076, -118.162772 33.768078, -118.162938 33.768078, -118.163854 33.768079, -118.16421 33.76808, -118.164738 33.768074, -118.165013 33.768097, -118.165136 33.768129, -118.165204 33.768161, -118.165299 33.768232, -118.165379 33.768324, -118.165389 33.768291, -118.165424 33.768173, -118.165457 33.768062, -118.165463 33.768042, -118.165621 33.767529, -118.16568 33.767337, -118.165806 33.766926, -118.16599 33.766328, -118.166149 33.765794, -118.166308 33.76528, -118.166632 33.764199, -118.166191 33.764109, -118.166348 33.763579, -118.16643 33.763468, -118.166608 33.762587, -118.166728 33.761999, -118.16676 33.761851, -118.166769 33.761802, -118.166792 33.761705, -118.16816 33.755431, -118.168649 33.753374, -118.169788 33.74847))'}}

def load_valid(geo):
    try:
        return loads(geo)
    except:
        return np.nan

df = pd.DataFrame(d)
df.tract_geom = df.tract_geom.apply(load_valid)
gdf = gp.GeoDataFrame(df.dropna(), geometry='tract_geom')
print(gdf, gdf.length, sep='\n\n') # (A geometric function)

输出:

state_fips_code  tract_ce  \
0                6    576700   

                                          tract_geom  
0  POLYGON ((-118.16979 33.74847, -118.16763 33.7...  

0    0.07811
dtype: float64