%reload_ext autoreload
%autoreload 2
from IPython.core.interactiveshell import InteractiveShell
= "last_expr_or_assign" InteractiveShell.ast_node_interactivity
Power Lines with ibis
python, ibis, folium, geospatial, power lines, duckdb
This is a reproduction of the post on the official ibis website with minor modifications.
I like to add the following to the top of all my notebooks.
Install
The packages can be installed with uv sync
command if you use the lock file in the source directory.
uv sync
Alternatively, you can install the packages with the following commands.
uv add 'ibis-framework[duckdb,geospatial]' folium geopandas
Imports
import ibis
from ibis import _
= True ibis.options.interactive
Download the data
= ibis.duckdb.connect(extensions=["spatial"]) con
<ibis.backends.duckdb.Backend at 0x115da1700>
import os
if not os.path.exists("power-infra-usa.geoparquet"):
# look into type infrastructure
= "s3://overturemaps-us-west-2/release/2024-07-22.0/theme=base/type=infrastructure/*"
url = con.read_parquet(url, table_name="infra-usa")
t
# filter for USA bounding box, subtype="power", and selecting only few columns
= t.filter(
expr > -125.0,
_.bbox.xmin > 24.8,
_.bbox.ymin < -65.8,
_.bbox.xmax < 49.2,
_.bbox.ymax == "power",
_.subtype "names", "geometry", "bbox", "class", "sources", "source_tags"])
).select([
"power-infra-usa.geoparquet")
con.to_parquet(expr,
None
Filter and destructure
With ibis
we can read the power infrastructure data.
= con.read_parquet("power-infra-usa.geoparquet") usa_power_infra
┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ names ┃ geometry ┃ bbox ┃ class ┃ sources ┃ source_tags ┃ ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ stru… │ geospatial:geometry │ struct<xmin: float32, xmax: float32, ymin: float32, ymax: float32> │ string │ array<struct<property: string, dataset: string, record_id: string, update_time:… │ map<string, string> │ ├───────┼──────────────────────────────────────────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────┼─────────────┼──────────────────────────────────────────────────────────────────────────────────┼──────────────────────────────────────────────┤ │ NULL │ <POINT (-114.291 27.151)> │ {'xmin': -114.29095458984375, 'xmax': -114.29093933105469, ... +2} │ power_tower │ [{...}] │ {} │ │ NULL │ <POINT (-114.289 27.149)> │ {'xmin': -114.28852844238281, 'xmax': -114.28851318359375, ... +2} │ power_tower │ [{...}] │ {} │ │ NULL │ <POINT (-114.29 27.15)> │ {'xmin': -114.29006958007812, 'xmax': -114.29005432128906, ... +2} │ power_tower │ [{...}] │ {} │ │ NULL │ <POINT (-114.29 27.151)> │ {'xmin': -114.2900619506836, 'xmax': -114.29004669189453, ... +2} │ power_tower │ [{...}] │ {} │ │ NULL │ <POLYGON ((-114.29 27.152, -114.29 27.151, -114.29 27.15, -114.291 27.151, -...> │ {'xmin': -114.29100036621094, 'xmax': -114.28948974609375, ... +2} │ substation │ [{...}] │ {'location': 'outdoor', 'voltage': '115000'} │ │ NULL │ <POINT (-114.291 27.151)> │ {'xmin': -114.29055786132812, 'xmax': -114.29054260253906, ... +2} │ portal │ [{...}] │ {} │ │ NULL │ <POINT (-114.291 27.152)> │ {'xmin': -114.29069519042969, 'xmax': -114.29067993164062, ... +2} │ power_tower │ [{...}] │ {} │ │ NULL │ <POINT (-114.288 27.151)> │ {'xmin': -114.28756713867188, 'xmax': -114.28755187988281, ... +2} │ power_tower │ [{...}] │ {} │ │ NULL │ <POINT (-114.286 27.152)> │ {'xmin': -114.28639221191406, 'xmax': -114.286376953125, ... +2} │ power_tower │ [{...}] │ {} │ │ NULL │ <POINT (-114.285 27.152)> │ {'xmin': -114.2851333618164, 'xmax': -114.28511810302734, ... +2} │ power_tower │ [{...}] │ {} │ │ … │ … │ … │ … │ … │ … │ └───────┴──────────────────────────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────────────┴─────────────┴──────────────────────────────────────────────────────────────────────────────────┴──────────────────────────────────────────────┘
Here’s all the types of infrastructure in the data:
Code
import rich
= rich.table.Table(title="Infrastructure Types")
table
"Type", justify="left", style="cyan", no_wrap=True)
table.add_column(
= (
infrastructure_types
usa_power_infra="class")
.rename(infrastructure_types
.select(_.infrastructure_types)
.distinct()
.execute()
)
for infra_type in infrastructure_types["infrastructure_types"]:
table.add_row(infra_type)
print(table) rich.
Infrastructure Types ┏━━━━━━━━━━━━━━━━━━━━┓ ┃ Type ┃ ┡━━━━━━━━━━━━━━━━━━━━┩ │ cable_distribution │ │ plant │ │ power_tower │ │ switch │ │ power_line │ │ substation │ │ cable │ │ portal │ │ catenary_mast │ │ generator │ │ transformer │ │ terminal │ │ power_pole │ │ minor_line │ │ insulator │ │ connection │ └────────────────────┘
We can get all the power lines using this filter:
(
usa_power_infrafilter(_["class"] == "power_line")
. )
┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ names ┃ geometry ┃ bbox ┃ class ┃ sources ┃ source_tags ┃ ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ stru… │ geospatial:geometry │ struct<xmin: float32, xmax: float32, ymin: float32, ymax: float32> │ string │ array<struct<property: string, dataset: string, record_id: string, update_time:… │ map<string, string> │ ├───────┼──────────────────────────────────────────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────┼────────────┼──────────────────────────────────────────────────────────────────────────────────┼──────────────────────────────────────────┤ │ NULL │ <LINESTRING (-115.191 28.056, -115.192 28.057, -115.192 28.058, -115.193 28....> │ {'xmin': -115.19832611083984, 'xmax': -115.19091796875, ... +2} │ power_line │ [{...}] │ {} │ │ NULL │ <LINESTRING (-113.414 27.641, -113.414 27.642, -113.416 27.642, -113.418 27....> │ {'xmin': -114.29095458984375, 'xmax': -113.41442108154297, ... +2} │ power_line │ [{...}] │ {'frequency': '60', 'voltage': '115000'} │ │ NULL │ <LINESTRING (-113.415 27.641, -113.415 27.642, -113.416 27.642, -113.416 27....> │ {'xmin': -113.41552734375, 'xmax': -112.40901184082031, ... +2} │ power_line │ [{...}] │ {'frequency': '60', 'voltage': '115000'} │ │ NULL │ <LINESTRING (-118.433 33.353, -118.432 33.352, -118.431 33.352, -118.431 33....> │ {'xmin': -118.43326568603516, 'xmax': -118.42095947265625, ... +2} │ power_line │ [{...}] │ {} │ │ NULL │ <LINESTRING (-118.489 33.434, -118.489 33.434, -118.489 33.433, -118.486 33....> │ {'xmin': -118.48948669433594, 'xmax': -118.31026458740234, ... +2} │ power_line │ [{...}] │ {} │ │ NULL │ <LINESTRING (-118.337 33.341, -118.336 33.341, -118.336 33.341, -118.335 33....> │ {'xmin': -118.33689880371094, 'xmax': -118.33036804199219, ... +2} │ power_line │ [{...}] │ {} │ │ NULL │ <LINESTRING (-118.345 33.347, -118.342 33.346, -118.342 33.345, -118.339 33....> │ {'xmin': -118.34489440917969, 'xmax': -118.31033325195312, ... +2} │ power_line │ [{...}] │ {} │ │ NULL │ <LINESTRING (-118.335 33.341, -118.335 33.341, -118.335 33.342, -118.335 33....> │ {'xmin': -118.33499145507812, 'xmax': -118.33025360107422, ... +2} │ power_line │ [{...}] │ {} │ │ NULL │ <LINESTRING (-118.346 33.347, -118.347 33.348, -118.349 33.349, -118.352 33....> │ {'xmin': -118.38180541992188, 'xmax': -118.34550476074219, ... +2} │ power_line │ [{...}] │ {} │ │ NULL │ <LINESTRING (-118.483 33.435, -118.483 33.436, -118.484 33.436, -118.484 33....> │ {'xmin': -118.49791717529297, 'xmax': -118.48307800292969, ... +2} │ power_line │ [{...}] │ {} │ │ … │ … │ … │ … │ … │ … │ └───────┴──────────────────────────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────────────┴────────────┴──────────────────────────────────────────────────────────────────────────────────┴──────────────────────────────────────────┘
This is what ibis does under the hood using sql and duckdb to extract data from the parquet file:
Code
ibis.to_sql(filter(_["class"] == "power_line")
usa_power_infra. )
SELECT
*
REPLACE (ST_ASWKB("geometry") AS "geometry")
FROM (
SELECT
*
FROM "ibis_read_parquet_n6ptzz4gvnb6dlcdlwzwpfb5tq" AS "t0"
WHERE
"t0"."class" = 'power_line'
)
The source_tags
column contains voltage information for some lines:
(
usa_power_infrafilter(_["class"] == "power_line")
.
.select(_.source_tags)
.distinct() )
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ source_tags ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ map<string, string> │ ├──────────────────────────────────────────────────────────────────────────────┤ │ {'frequency': '60;60;0', 'voltage': '230000;230000;0'} │ │ {'frequency': '60;60;0', 'voltage': '230000;220000;0'} │ │ {'voltage': '500000'} │ │ {'frequency': '60', 'voltage': '66000;66000;12000'} │ │ {'frequency': '60', 'voltage': '66000;12000;'} │ │ {'frequency': '60', 'voltage': '66000;12000;240'} │ │ {'ref': 'Moss Landing - Metcalf', 'voltage': '230000'} │ │ {'frequency': '60', 'voltage': '230000', ... +1} │ │ {'frequency': '60', 'voltage': '230000', ... +1} │ │ {'voltage': '230000', 'ref': 'Castro Valley-Newark/San Ramon-Castro Valley'} │ │ … │ └──────────────────────────────────────────────────────────────────────────────┘
You can see that source_tags
is a map (or record or dictionary) that contains voltage information. Let’s extract just the first voltage value we can find into new column called voltage:
= (
power_lines filter(_["class"] == "power_line")
usa_power_infra.filter(_.source_tags["voltage"] != ibis.null())
.
.mutate(=_.source_tags["voltage"]
voltage";")[0]
.split(":")[0]
.split("/")[0]
.split("'")[0]
.split(",")[0]
.split("=")[0]
.split("int64")
.try_cast(
)filter([
.
_.voltage.notnull(),> 0.0,
_.voltage
]) )
┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓ ┃ names ┃ geometry ┃ bbox ┃ class ┃ sources ┃ source_tags ┃ voltage ┃ ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩ │ stru… │ geospatial:geometry │ struct<xmin: float32, xmax: float32, ymin: float32, ymax: float32> │ string │ array<struct<property: string, dataset: string, record_id: string, update_time:… │ map<string, string> │ int64 │ ├───────┼──────────────────────────────────────────────────────────────────────────────────┼────────────────────────────────────────────────────────────────────┼────────────┼──────────────────────────────────────────────────────────────────────────────────┼──────────────────────────────────────────┼─────────┤ │ NULL │ <LINESTRING (-113.414 27.641, -113.414 27.642, -113.416 27.642, -113.418 27....> │ {'xmin': -114.29095458984375, 'xmax': -113.41442108154297, ... +2} │ power_line │ [{...}] │ {'frequency': '60', 'voltage': '115000'} │ 115000 │ │ NULL │ <LINESTRING (-113.415 27.641, -113.415 27.642, -113.416 27.642, -113.416 27....> │ {'xmin': -113.41552734375, 'xmax': -112.40901184082031, ... +2} │ power_line │ [{...}] │ {'frequency': '60', 'voltage': '115000'} │ 115000 │ │ NULL │ <LINESTRING (-115.987 30.715, -115.986 30.716, -115.986 30.715, -115.986 30....> │ {'xmin': -115.98729705810547, 'xmax': -115.90068054199219, ... +2} │ power_line │ [{...}] │ {'frequency': '60', 'voltage': '115000'} │ 115000 │ │ NULL │ <LINESTRING (-112.692 29.91, -112.693 29.91, -112.693 29.91, -112.693 29.909)> │ {'xmin': -112.69328308105469, 'xmax': -112.69247436523438, ... +2} │ power_line │ [{...}] │ {'frequency': '60', 'voltage': '230000'} │ 230000 │ │ NULL │ <LINESTRING (-112.691 29.909, -112.692 29.909, -112.692 29.909, -112.692 29....> │ {'xmin': -112.69207000732422, 'xmax': -112.69055938720703, ... +2} │ power_line │ [{...}] │ {'frequency': '60', 'voltage': '230000'} │ 230000 │ │ NULL │ <LINESTRING (-112.692 29.908, -112.691 29.908, -112.691 29.908, -112.691 29....> │ {'xmin': -112.69156646728516, 'xmax': -112.69055938720703, ... +2} │ power_line │ [{...}] │ {'frequency': '60', 'voltage': '230000'} │ 230000 │ │ NULL │ <LINESTRING (-112.692 29.909, -112.692 29.909, -112.692 29.909, -112.691 29.91)> │ {'xmin': -112.69243621826172, 'xmax': -112.69139099121094, ... +2} │ power_line │ [{...}] │ {'frequency': '60', 'voltage': '230000'} │ 230000 │ │ NULL │ <LINESTRING (-112.691 29.91, -112.692 29.909, -112.693 29.909, -112.693 29.909)> │ {'xmin': -112.69294738769531, 'xmax': -112.69139099121094, ... +2} │ power_line │ [{...}] │ {'frequency': '60', 'voltage': '230000'} │ 230000 │ │ NULL │ <LINESTRING (-112.648 29.904, -112.649 29.904, -112.651 29.905, -112.654 29....> │ {'xmin': -112.6930160522461, 'xmax': -112.64823913574219, ... +2} │ power_line │ [{...}] │ {'frequency': '60', 'voltage': '230000'} │ 230000 │ │ NULL │ <LINESTRING (-112.691 29.909, -112.689 29.911, -112.688 29.914, -112.686 29....> │ {'xmin': -112.6905746459961, 'xmax': -112.65251922607422, ... +2} │ power_line │ [{...}] │ {'frequency': '60', 'voltage': '230000'} │ 230000 │ │ … │ … │ … │ … │ … │ … │ … │ └───────┴──────────────────────────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────────────┴────────────┴──────────────────────────────────────────────────────────────────────────────────┴──────────────────────────────────────────┴─────────┘
And then finally we can filter only the power lines with a voltage greater than 230 kV:
= (
power_lines_subset
power_linesfilter(_.voltage > 230e3)
.
.select(_.voltage, _.geometry)
.order_by(_.voltage) )
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ voltage ┃ geometry ┃ ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ int64 │ geospatial:geometry │ ├─────────┼──────────────────────────────────────────────────────────────────────────────────┤ │ 235000 │ <LINESTRING (-104.812 39.762, -104.813 39.762, -104.813 39.762, -104.813 39....> │ │ 235000 │ <LINESTRING (-104.812 39.762, -104.812 39.762, -104.812 39.762, -104.812 39....> │ │ 250000 │ <LINESTRING (-92.297 46.773, -92.298 46.773, -92.302 46.773, -92.307 46.773,...> │ │ 250000 │ <LINESTRING (-92.297 46.774, -92.297 46.774, -92.297 46.775, -92.295 46.775,...> │ │ 250000 │ <LINESTRING (-101.194 47.072, -101.195 47.072)> │ │ 250000 │ <LINESTRING (-101.195 47.072, -101.194 47.072)> │ │ 250000 │ <LINESTRING (-101.194 47.072, -101.193 47.072, -101.19 47.072, -101.187 47.0...> │ │ 250000 │ <LINESTRING (-98.585 46.725, -98.584 46.724, -98.582 46.723, -98.58 46.721, ...> │ │ 260000 │ <LINESTRING (-123.075 49.061, -123.076 49.06, -123.082 49.06, -123.088 49.06...> │ │ 276000 │ <LINESTRING (-72.23 41.712, -72.229 41.713)> │ │ … │ … │ └─────────┴──────────────────────────────────────────────────────────────────────────────────┘
Visualize
We can read the data into a geopandas dataframe:
import geopandas as gpd
= gpd.GeoDataFrame(power_lines_subset.execute()); gdf
And we can visualize it using folium
:
import folium
= folium.Map(location=[41.5435959, -99.8396373], zoom_start=5)
m
="Power Lines").add_to(m)
folium.GeoJson(gdf.to_json(), name
m