importer/
pick_geofabrik.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
use std::convert::TryInto;

use anyhow::{bail, Result};
use geo::{Area, Contains};
use geojson::GeoJson;

use abstutil::Timer;

/// Given the path to a GeoJSON boundary polygon, return the URL of the smallest Geofabrik osm.pbf
/// file that completely covers the boundary, and the path to where the local copy should go.
pub async fn pick_geofabrik(input: String) -> Result<(String, String)> {
    let boundary = load_boundary(input)?;

    let geofabrik_idx = load_remote_geojson(
        abstio::path_shared_input("geofabrik-index.json"),
        "https://download.geofabrik.de/index-v1.json",
    )
    .await?;
    let matches = find_matching_regions(geofabrik_idx, boundary);
    info!("{} regions contain boundary", matches.len(),);
    // Find the smallest matching region. Just round to the nearest square meter for comparison.
    let (_, url) = matches
        .into_iter()
        .min_by_key(|(mp, _)| mp.unsigned_area() as usize)
        .unwrap();

    // Contains some directory structure, like north-america/us/wyoming-latest.osm.pbf or
    // asia/yemen-latest.osm.pbf
    let basename = url
        .strip_prefix("https://download.geofabrik.de/")
        .expect("Geofabrik URLs changed");
    let local = abstio::path_shared_input(format!("geofabrik/{basename}"));

    Ok((url, local))
}

fn load_boundary(path: String) -> Result<geo::Polygon> {
    let gj: GeoJson = abstio::maybe_read_json(path, &mut Timer::throwaway())?;
    let mut features = match gj {
        GeoJson::Feature(feature) => vec![feature],
        GeoJson::FeatureCollection(feature_collection) => feature_collection.features,
        _ => bail!("Unexpected geojson: {:?}", gj),
    };
    if features.len() != 1 {
        bail!("Expected exactly 1 feature");
    }
    let poly: geo::Polygon = features
        .pop()
        .unwrap()
        .geometry
        .take()
        .unwrap()
        .value
        .try_into()
        .unwrap();
    Ok(poly)
}

async fn load_remote_geojson(path: String, url: &str) -> Result<GeoJson> {
    if !abstio::file_exists(&path) {
        info!("Downloading {}", url);
        abstio::download_to_file(url, None, &path).await?;
    }
    abstio::maybe_read_json(path, &mut Timer::throwaway())
}

fn find_matching_regions(
    geojson: GeoJson,
    boundary: geo::Polygon,
) -> Vec<(geo::MultiPolygon, String)> {
    let mut matches = Vec::new();

    // We're assuming some things about the geofabrik_idx index format -- it's a feature
    // collection, every feature has a multipolygon geometry, the properties have a particular
    // form.
    if let GeoJson::FeatureCollection(fc) = geojson {
        info!("Searching {} regions", fc.features.len());
        for mut feature in fc.features {
            let mp: geo::MultiPolygon = feature.geometry.take().unwrap().value.try_into().unwrap();
            if mp.contains(&boundary) {
                matches.push((
                    mp,
                    feature
                        .property("urls")
                        .unwrap()
                        .get("pbf")
                        .unwrap()
                        .as_str()
                        .unwrap()
                        .to_string(),
                ));
            }
        }
    }

    matches
}