Source code for osm_flex.simplify

"""
This file is part of OSM-flex.
Copyright (C) 2023 OSM-flex contributors listed in AUTHORS.
OSM-flex is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free
Software Foundation, version 3.
OSM-flex is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License for more details.
-----
simplification functions
"""

import geopandas as gpd
import numpy as np

[docs] def remove_small_polygons(gdf, min_area): """Remove (multi-)polygons of area smaller than min_area Points and lines are untouched. Note: a buffer of 1e-10 is added to invalid geometries Parameters ---------- gdf : GeoDataFrame geodataframe with polygons min_area : float minimal value of area Return ------ GeoDataFrame: entry geodataframe without (multi-)polygons smaller than min_area """ gdf_temp = gdf.copy() def make_valid(geometry): if geometry.is_valid: return geometry return geometry.buffer(1e-10) gdf_temp['geometry'] = gdf_temp.apply(lambda row: make_valid(row.geometry), axis=1) return gdf_temp[(gdf_temp['geometry'].area > min_area) | (gdf_temp['geometry'].area == 0)].reset_index(drop=True)
[docs] def remove_contained_points(gdf_p_mp): """ from a GeoDataFrame containing points and (multi-)polygons, remove those points that are contained in a multipolygons entry. Resets the index of the dataframe. Parameters ---------- gdf_p_mp : gpd.GeoDataFrame GeoDataFrame containing entries with point and (multi-)polygon geometry """ gdf_p_mp = gdf_p_mp.reset_index(drop=True) ind_dupl = np.unique(gpd.sjoin(gdf_p_mp[gdf_p_mp.geometry.type=='Point'], gdf_p_mp[(gdf_p_mp.geometry.type=='MultiPolygon')| (gdf_p_mp.geometry.type=='Polygon')], predicate='within').index) return gdf_p_mp.drop(index=ind_dupl).reset_index(drop=True)
[docs] def remove_contained_polys(gdf): """ from a GeoDataFrame containing (multi-)polygons (and potentially other geometries), remove those polygon entries that are already fully contained in another polygon entries. Removes smaller polygons within polygons and full duplicates, but leaves contained points untouched (see remove_contained_points() for this). Resets the index of the dataframe. Parameters ---------- gdf : gpd.GeoDataFrame GeoDataFrame containing entries with (multi-)polygon geometry """ gdf = gdf.reset_index(drop=True) contained = gpd.sjoin( gdf[(gdf.geometry.type=='MultiPolygon')| (gdf.geometry.type=='Polygon')], gdf[(gdf.geometry.type=='MultiPolygon')| (gdf.geometry.type=='Polygon')], predicate='contains' ) subset = contained[contained.index != contained.index_right] to_drop = set(subset.index_right) - set(subset.index) return gdf.drop(index=to_drop).reset_index(drop=True)
[docs] def remove_exact_duplicates(gdf): """ from a GeoDataFrame containing any sort of geometries, remove those entries which already have an exact duplicate geometry entry. Resets the index of the dataframe. Parameters ---------- gdf : gpd.GeoDataFrame GeoDataFrame containing any types of geometry """ gdf = gdf.reset_index(drop=True) geom_wkb = gdf["geometry"].apply(lambda geom: geom.wkb) return gdf.loc[geom_wkb.drop_duplicates().index].reset_index(drop=True)