cli/
clip_osm.rs

1use std::collections::HashSet;
2use std::io::{BufReader, BufWriter};
3
4use anyhow::Result;
5use fs_err::File;
6use geo::prelude::Contains;
7use geo::{LineString, Point, Polygon};
8use osmio::obj_types::ArcOSMObj;
9use osmio::{Node, OSMObj, OSMObjBase, OSMObjectType, OSMReader, OSMWriter, Relation, Way};
10
11use geom::LonLat;
12
13pub fn run(pbf_path: String, clip_path: String, out_path: String) -> Result<()> {
14    let boundary_pts = LonLat::read_geojson_polygon(&clip_path)?;
15    let raw_pts: Vec<(f64, f64)> = boundary_pts
16        .into_iter()
17        .map(|pt| (pt.x(), pt.y()))
18        .collect();
19    let boundary = Polygon::new(LineString::from(raw_pts), Vec::new());
20    clip(&pbf_path, &boundary, &out_path)
21}
22
23fn clip(pbf_path: &str, boundary: &Polygon, out_path: &str) -> Result<()> {
24    // TODO Maybe just have a single map with RcOSMObj. But then the order we write will be wrong.
25    let mut way_node_ids: HashSet<i64> = HashSet::new();
26    let mut way_ids: HashSet<i64> = HashSet::new();
27    let mut relation_ids: HashSet<i64> = HashSet::new();
28    {
29        // First Pass: accumulate the IDs we want to include in the output
30        let mut reader = osmio::pbf::PBFReader::new(BufReader::new(File::open(pbf_path)?));
31        let mut node_ids_within_boundary: HashSet<i64> = HashSet::new();
32        for obj in reader.objects() {
33            match obj.object_type() {
34                OSMObjectType::Node => {
35                    let node = obj.into_node().unwrap();
36                    if let Some(lat_lon) = node.lat_lon() {
37                        if boundary.contains(&to_pt(lat_lon)) {
38                            node_ids_within_boundary.insert(node.id());
39                        }
40                    }
41                }
42                OSMObjectType::Way => {
43                    // Assume all nodes appear before any way.
44                    let way = obj.into_way().unwrap();
45                    if way
46                        .nodes()
47                        .iter()
48                        .any(|id| node_ids_within_boundary.contains(id))
49                    {
50                        way_ids.insert(way.id());
51
52                        // To properly compute border nodes, we include all nodes of ways that are
53                        // at least partially in the boundary.
54                        way_node_ids.extend(way.nodes().iter().cloned());
55                    }
56                }
57                OSMObjectType::Relation => {
58                    let relation = obj.into_relation().unwrap();
59                    if relation.members().any(|(obj_type, id, _)| {
60                        (obj_type == OSMObjectType::Node && node_ids_within_boundary.contains(&id))
61                            || (obj_type == OSMObjectType::Way && way_ids.contains(&id))
62                            || (obj_type == OSMObjectType::Relation && relation_ids.contains(&id))
63                    }) {
64                        relation_ids.insert(relation.id());
65                    }
66                }
67            }
68        }
69    }
70
71    let mut writer = osmio::xml::XMLWriter::new(BufWriter::new(File::create(out_path)?));
72    // Second Pass: write the feature for each ID accumulated in the first pass
73    let mut reader = osmio::pbf::PBFReader::new(BufReader::new(File::open(pbf_path)?));
74    for obj in reader.objects() {
75        match &obj {
76            ArcOSMObj::Node(node) => {
77                if way_node_ids.contains(&node.id()) {
78                    writer.write_obj(&obj)?;
79                }
80            }
81            ArcOSMObj::Way(way) => {
82                if way_ids.contains(&way.id()) {
83                    writer.write_obj(&obj)?;
84                }
85            }
86            ArcOSMObj::Relation(relation) => {
87                if relation_ids.contains(&relation.id()) {
88                    writer.write_obj(&obj)?;
89                }
90            }
91        }
92    }
93
94    // Don't call write.close() -- it happens when writer gets dropped, and the implementation
95    // isn't idempotent.
96
97    Ok(())
98}
99
100fn to_pt(pair: (osmio::Lat, osmio::Lon)) -> Point {
101    // Note our polygon uses (lon, lat)
102    (pair.1.into(), pair.0.into()).into()
103}