kml/
lib.rs

1// TODO Time to rename this crate
2
3#[macro_use]
4extern crate anyhow;
5#[macro_use]
6extern crate log;
7
8use std::collections::BTreeMap;
9
10use anyhow::Result;
11use serde::{Deserialize, Serialize};
12
13use abstutil::{prettyprint_usize, Timer};
14use geom::{GPSBounds, LonLat, PolyLine, Polygon};
15
16/// Some dataset imported from KML, CSV, or something else. If the dataset is large, converting to
17/// this format and serializing is faster than parsing the original again.
18#[derive(Serialize, Deserialize)]
19pub struct ExtraShapes {
20    pub shapes: Vec<ExtraShape>,
21}
22
23/// A single object in the dataset.
24#[derive(Clone, Debug, Serialize, Deserialize)]
25pub struct ExtraShape {
26    /// The object has a different inferred shape depending on the points:
27    /// - a single point just represents a position
28    /// - a ring of points (with the first and last matching) is interpreted as a polygon
29    /// - multiple points are interpreted as a PolyLine
30    pub points: Vec<LonLat>,
31    /// Arbitrary key/value pairs associated with this object; no known schema.
32    pub attributes: BTreeMap<String, String>,
33}
34
35/// Parses a .kml file and returns ExtraShapes. Objects will be clipped to the given gps_bounds. If
36/// require_all_pts_in_bounds is true, objects that're partly out-of-bounds will be skipped.
37pub fn load(
38    path: String,
39    gps_bounds: &GPSBounds,
40    require_all_pts_in_bounds: bool,
41    timer: &mut Timer,
42) -> Result<ExtraShapes> {
43    timer.start(format!("read {}", path));
44    let bytes = abstio::slurp_file(&path)?;
45    let raw_string = std::str::from_utf8(&bytes)?;
46    let tree = roxmltree::Document::parse(raw_string)?;
47    timer.stop(format!("read {}", path));
48
49    let mut shapes = Vec::new();
50    let mut skipped_count = 0;
51    let mut kv = BTreeMap::new();
52
53    timer.start("scrape objects");
54    recurse(
55        tree.root(),
56        &mut shapes,
57        &mut skipped_count,
58        &mut kv,
59        gps_bounds,
60        require_all_pts_in_bounds,
61    )?;
62    timer.stop("scrape objects");
63
64    info!(
65        "Got {} shapes from {} and skipped {} shapes",
66        prettyprint_usize(shapes.len()),
67        path,
68        prettyprint_usize(skipped_count)
69    );
70
71    Ok(ExtraShapes { shapes })
72}
73
74fn recurse(
75    node: roxmltree::Node,
76    shapes: &mut Vec<ExtraShape>,
77    skipped_count: &mut usize,
78    kv: &mut BTreeMap<String, String>,
79    gps_bounds: &GPSBounds,
80    require_all_pts_in_bounds: bool,
81) -> Result<()> {
82    for child in node.children() {
83        recurse(
84            child,
85            shapes,
86            skipped_count,
87            kv,
88            gps_bounds,
89            require_all_pts_in_bounds,
90        )?;
91    }
92    if node.tag_name().name() == "SimpleData" {
93        let key = node.attribute("name").unwrap().to_string();
94        let value = node
95            .text()
96            .map(|x| x.to_string())
97            .unwrap_or_else(String::new);
98        kv.insert(key, value);
99    } else if node.tag_name().name() == "coordinates" {
100        let mut any_oob = false;
101        let mut any_ok = false;
102        let mut pts: Vec<LonLat> = Vec::new();
103        if let Some(txt) = node.text() {
104            for pair in txt.trim().split(' ') {
105                if let Some(pt) = parse_pt(pair) {
106                    pts.push(pt);
107                    if gps_bounds.contains(pt) {
108                        any_ok = true;
109                    } else {
110                        any_oob = true;
111                    }
112                } else {
113                    bail!("Malformed coordinates: {}", pair);
114                }
115            }
116        }
117        if any_ok && (!any_oob || !require_all_pts_in_bounds) {
118            let attributes = std::mem::take(kv);
119            shapes.push(ExtraShape {
120                points: pts,
121                attributes,
122            });
123        } else {
124            *skipped_count += 1;
125        }
126    }
127    Ok(())
128}
129
130fn parse_pt(input: &str) -> Option<LonLat> {
131    let coords: Vec<&str> = input.split(',').collect();
132    // Normally each coordinate is just (X, Y), but for census tract files, there's a third Z
133    // component that's always 0. Just ignore it.
134    if coords.len() < 2 {
135        return None;
136    }
137    match (coords[0].parse::<f64>(), coords[1].parse::<f64>()) {
138        (Ok(lon), Ok(lat)) => Some(LonLat::new(lon, lat)),
139        _ => None,
140    }
141}
142
143impl ExtraShapes {
144    /// Parses a .csv file and returns ExtraShapes. Each record must EITHER have a column called
145    /// 'Longitude' and 'Latitude', representing a single point, OR a column called 'geometry' with
146    /// a WKT-style linestring. All other columns will just be attributes. Objects that're partly
147    /// out-of-bounds will be excluded.
148    pub fn load_csv(
149        path: String,
150        gps_bounds: &GPSBounds,
151        timer: &mut Timer,
152    ) -> Result<ExtraShapes> {
153        timer.start(format!("read {}", path));
154        let mut shapes = Vec::new();
155        for rec in csv::Reader::from_path(&path)?.deserialize() {
156            let mut rec: BTreeMap<String, String> = rec?;
157            match (
158                rec.remove("Longitude"),
159                rec.remove("Latitude"),
160                rec.remove("geometry"),
161            ) {
162                (Some(lon), Some(lat), _) => {
163                    if let (Ok(lon), Ok(lat)) = (lon.parse::<f64>(), lat.parse::<f64>()) {
164                        let pt = LonLat::new(lon, lat);
165                        if gps_bounds.contains(pt) {
166                            shapes.push(ExtraShape {
167                                points: vec![pt],
168                                attributes: rec,
169                            });
170                        }
171                    }
172                }
173                (None, None, Some(raw)) => {
174                    if let Some(points) = LonLat::parse_wkt_linestring(&raw) {
175                        if gps_bounds.try_convert(&points).is_some() {
176                            shapes.push(ExtraShape {
177                                points,
178                                attributes: rec,
179                            });
180                        }
181                    }
182                }
183                _ => {
184                    timer.stop(format!("read {}", path));
185                    bail!(
186                        "{} doesn't have a column called Longitude, Latitude, or geometry",
187                        path
188                    )
189                }
190            }
191        }
192        timer.stop(format!("read {}", path));
193        Ok(ExtraShapes { shapes })
194    }
195}
196
197impl ExtraShapes {
198    /// Parses a .geojson file and returns ExtraShapes
199    pub fn load_geojson_no_clipping(
200        path: String,
201        gps_bounds: &GPSBounds,
202        require_in_bounds: bool,
203    ) -> Result<ExtraShapes> {
204        let bytes = abstio::slurp_file(path)?;
205        let mut shapes = Vec::new();
206
207        for (polygon, attributes) in
208            Polygon::from_geojson_bytes(&bytes, gps_bounds, require_in_bounds)?
209        {
210            shapes.push(ExtraShape {
211                // Awkward, but we have to convert back
212                points: gps_bounds.convert_back(polygon.get_outer_ring().points()),
213                attributes,
214            });
215        }
216        for (pl, attributes) in PolyLine::from_geojson_bytes(&bytes, gps_bounds, require_in_bounds)?
217        {
218            shapes.push(ExtraShape {
219                // Awkward, but we have to convert back
220                points: gps_bounds.convert_back(pl.points()),
221                attributes,
222            });
223        }
224
225        Ok(ExtraShapes { shapes })
226    }
227}