Shortcuts

Source code for torchgeo.datasets.openbuildings

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

"""Open Buildings datasets."""

import glob
import json
import os
import sys
from collections.abc import Iterable
from typing import Any, Callable, Optional, Union, cast

import fiona
import fiona.transform
import matplotlib.pyplot as plt
import pandas as pd
import rasterio
import shapely
import shapely.wkt as wkt
import torch
from matplotlib.figure import Figure
from rasterio.crs import CRS
from rtree.index import Index, Property

from .geo import VectorDataset
from .utils import BoundingBox, check_integrity


[docs]class OpenBuildings(VectorDataset): r"""Open Buildings dataset. The `Open Buildings <https://sites.research.google/open-buildings/#download>`__ dataset consists of computer generated building detections across the African continent. Dataset features: * 516M building detections as polygons with centroid lat/long * covering area of 19.4M km\ :sup:`2`\ (64% of the African continent) * confidence score and `Plus Code <https://maps.google.com/pluscodes/>`_ Dataset format: * csv files containing building detections compressed as csv.gz * meta data geojson file The data can be downloaded from `here <https://sites.research.google/open-buildings/#download>`__. Additionally, the `meta data geometry file <https://sites.research.google/open-buildings/tiles.geojson>`_ also needs to be placed in `root` as `tiles.geojson`. If you use this dataset in your research, please cite the following technical report: * https://arxiv.org/abs/2107.12283 .. versionadded:: 0.3 """ md5s = { "025_buildings.csv.gz": "41db2572bfd08628d01475a2ee1a2f17", "04f_buildings.csv.gz": "3232c1c6d45c1543260b77e5689fc8b1", "05b_buildings.csv.gz": "4fc57c63bbbf9a21a3902da7adc3a670", "093_buildings.csv.gz": "00fce146dadf0b30255e750c4c5ac2de", "095_buildings.csv.gz": "f5765b0936f7ccbd0b4abed60d994f08", "0c3_buildings.csv.gz": "013b130fe872387e0cff842399b423de", "0c3_buildings.csv": "a697ad2433e9a9f6001de25b4664651a", "0c5_buildings.csv.gz": "16ca283e9344e9da8b47acaf03c1c6e4", "0c7_buildings.csv.gz": "b3774930006497a80c8a2fbf33056610", "0d1_buildings.csv.gz": "41e652218ca5964d297d9cd1d84b831c", "0d7_buildings.csv.gz": "d365fe47d10b0756dd54ceca24598d8e", "0d9_buildings.csv.gz": "3ebd47fa4f86857266e9a7346d6aa163", "0db_buildings.csv.gz": "368213e9caa7ee229ef9403b0ca8c80d", "0dd_buildings.csv.gz": "8f5fcefff262fdfd82800092d2e9d841", "0df_buildings.csv.gz": "cbb5f63b10daa25568bdde8d9f66f8a4", "0e1_buildings.csv.gz": "a9b9bf1e541b62c8a34d2f6f2ae71e1c", "0e3_buildings.csv.gz": "3d9c2ffc11c02aec2bd008699f9c4bd1", "0e5_buildings.csv.gz": "1e1b2bf63dfc520e62e4b68db23fe64c", "0e7_buildings.csv.gz": "c96797588c90e66268367cb56b4b9af8", "0e9_buildings.csv.gz": "c53bb7bbc8140034d1be2c49ff49af68", "0eb_buildings.csv.gz": "407c771f614a15d69d78f1e25decf694", "0ed_buildings.csv.gz": "bddd10992d291677019d7106ce1f4fac", "0ef_buildings.csv.gz": "d1b91936e7ac06c661878ef9eb5dba7b", "0f1_buildings.csv.gz": "9d86eb10d2d8766e1385b6c52c11d5e2", "0f9_buildings.csv.gz": "1c6775131214b26f4a27b4c42d6e9fca", "0fb_buildings.csv.gz": "d39528cb4e0cbff589ca89dc86d9b5db", "0fd_buildings.csv.gz": "304fe4a60e950c900697d975098f7536", "0ff_buildings.csv.gz": "266ca7ed1ad0251b3999b0e2e9b54648", "103_buildings.csv.gz": "8d3cafab5f1e02b2a0a6180eb34d1cac", "105_buildings.csv.gz": "dd61cc74239aa9a1b30f10859122807b", "107_buildings.csv.gz": "823c05984f859a1bf17af8ce78bf2892", "109_buildings.csv.gz": "cfdee0e807168cd1c183d9c01535369b", "10b_buildings.csv.gz": "d8ecaf406abd864b641ba34985f3042e", "10d_buildings.csv.gz": "af584a542a17942ff7e94653322dba87", "10f_buildings.csv.gz": "3d5369e15c4d1f59fb38cf61f4e6290b", "111_buildings.csv.gz": "47504e43d1b67101bed5d924225328dc", "113_buildings.csv.gz": "3f991c831569f91f34eaa8fc3882b2fd", "117_buildings.csv.gz": "a4145fa6e458480e30c807f80ae5cd65", "119_buildings.csv.gz": "5661b7ac23f266542c7e0d962a8cae58", "11b_buildings.csv.gz": "41b6d036610d0bddac069ec72e68710e", "11d_buildings.csv.gz": "1ef75e9d176dd8d6bfa6012d36b1d25c", "11f_buildings.csv.gz": "f004873d1ef3933c1716ab6409565b7d", "121_buildings.csv.gz": "0c7e7a9043ed069fbdefdcfcfc437482", "123_buildings.csv.gz": "c46bd53b67025c3de11657220cce0aec", "125_buildings.csv.gz": "33253ae1a82656f4eedca9bd86f981a3", "127_buildings.csv.gz": "2f827f8fc93485572178e9ad0c65e22d", "129_buildings.csv.gz": "74f98346990a1d1e41241ce8f4bb201a", "12f_buildings.csv.gz": "b1b0777296df2bfef512df0945ca3e14", "131_buildings.csv.gz": "8362825b10c9396ecbb85c49cd210bc6", "137_buildings.csv.gz": "96da7389df820405b0010db4a6c98c61", "139_buildings.csv.gz": "c41e26fc6f3565c3d7c66ab977dc8159", "13b_buildings.csv.gz": "981d4ccb0f41a103bdad8ef949eb4ffe", "13d_buildings.csv.gz": "d15585d06ee74b0095842dd887197035", "141_buildings.csv.gz": "ae0bf17778d45119c74e50e06a04020d", "143_buildings.csv.gz": "9699809e57eb097dfaf9d484f1d9c5fa", "145_buildings.csv.gz": "81e74e0165ea358278ce18507dddfdb0", "147_buildings.csv.gz": "39edad15fa16c432f5d460f0a8166032", "149_buildings.csv.gz": "94bf8f8fa221744fb1d57c7d4065e69e", "14f_buildings.csv.gz": "ca8410be89b5cf868c2a67861712e4ea", "15b_buildings.csv.gz": "8c0071c0ae20a60e8dd4d7aa6aac5a99", "15d_buildings.csv.gz": "35f044a323556adda5f31e8fc9307c85", "161_buildings.csv.gz": "ba08b70a26f07b5e2cd4eafd9d6f826b", "163_buildings.csv.gz": "2bec83a2504b531cd1cb0311fcb6c952", "165_buildings.csv.gz": "48f934733dd3054164f9b09abee63312", "167_buildings.csv.gz": "bba8657024d80d44e475759b65adc969", "169_buildings.csv.gz": "13e142e48597ee7a8b0b812e226dfa72", "16b_buildings.csv.gz": "9c62351d6cc8eaf761ab89d4586d26d6", "16d_buildings.csv.gz": "a33c23da3f603c8c3eacc5e6a47aaf66", "16f_buildings.csv.gz": "4850dd7c8f0fb628ba5864ea9f47647b", "171_buildings.csv.gz": "4217f1b025db869c8bed1014704c2a79", "173_buildings.csv.gz": "5a5f3f07e261a9dc58c6180b69130e4a", "175_buildings.csv.gz": "5bbf7a7c8f57d28e024ddf8f4039b575", "177_buildings.csv.gz": "76cd4b17d68d62e1f088f229b65f8acf", "179_buildings.csv.gz": "a5a1c6609483336ddff91b2385e70eb9", "17b_buildings.csv.gz": "a47c1145a3b0bcdaba18c153b7b92b87", "17d_buildings.csv.gz": "3226d0abf396f44c1a436be83538dfd8", "17f_buildings.csv.gz": "3e18d4fc5837ee89274d30f2126b92b2", "181_buildings.csv.gz": "c87639d7f6d6a85a3fa6b06910b0e145", "183_buildings.csv.gz": "e94438ebf19b3b25035954d23a0e90cf", "185_buildings.csv.gz": "8de8d1d50c16c575f85b96dee474cb56", "189_buildings.csv.gz": "da94cd495a99496fd687bbb4a1715c90", "18b_buildings.csv.gz": "9ab353335fe6ff694e834889be2b305d", "18d_buildings.csv.gz": "e37e0f868ce96f7d14f7bf1a301da1d3", "18f_buildings.csv.gz": "e9000b9ef9bb0f838088e96becfc95a1", "191_buildings.csv.gz": "c00bb4d6b2b12615d576c06fe545cbfa", "193_buildings.csv.gz": "d48d4c03ef053f6987b3e6e9e78a8b03", "195_buildings.csv.gz": "d93ab833e74480f07a5ccf227067db5a", "197_buildings.csv.gz": "8667e040f9863e43924aafe6071fabc7", "199_buildings.csv.gz": "04ba65a4caf16cc1e0d5c4e1322c5885", "19b_buildings.csv.gz": "e49412e3e1bccceb0bdb4df5201288f4", "19d_buildings.csv.gz": "92b5fb4e96529d90e99c788e3e8696d4", "19f_buildings.csv.gz": "c023f6c37d0026b56f530b841517a6cd", "1a1_buildings.csv.gz": "471483b50c722af104af8a582e780c04", "1a3_buildings.csv.gz": "0a453053f1ff53f9e165e16c7f97354a", "1a5_buildings.csv.gz": "1f6a823e223d5f29c66aa728933de684", "1a7_buildings.csv.gz": "6130b724501fa16e6d84e484c4091f1f", "1a9_buildings.csv.gz": "73022e8e7b994e76a58cc763a057d542", "1b9_buildings.csv.gz": "48dea4af9d12b755e75b76c68c47de6b", "1bb_buildings.csv.gz": "dfb9ee4d3843d81722b70f7582c775a4", "1bd_buildings.csv.gz": "fdea2898fc50ae25b6196048373d8244", "1bf_buildings.csv.gz": "96ef27d6128d0bcdfa896fed6f27cdd0", "1c1_buildings.csv.gz": "32e3667d939e7f95316eb75a6ffdb603", "1c3_buildings.csv.gz": "ed94b543da1bbe3101ed66f7d7727d24", "1c5_buildings.csv.gz": "ce527ab33e564f0cc1b63ae467932a18", "1c7_buildings.csv.gz": "d5fb474466d6a11d3b08e3a011984ada", "1dd_buildings.csv.gz": "9e7e50e3f95b3f2ceff6351b75ca1e75", "1e5_buildings.csv.gz": "f95ea85fce47ce7edf5729086d43f922", "1e7_buildings.csv.gz": "2bca5682c48134e69b738d70dfe7d516", "1e9_buildings.csv.gz": "f049ad06dbbb200f524b4f50d1df8c2e", "1eb_buildings.csv.gz": "6822d7f202b453ec3cc03fb8f04691ad", "1ed_buildings.csv.gz": "9dfc560e2c3d135ebdcd46fa09c47169", "1ef_buildings.csv.gz": "506e7772c35b09cfd3b6f8691dc2947d", "1f1_buildings.csv.gz": "b74f2b585cfad3b881fe4f124080440a", "1f3_buildings.csv.gz": "12896642315320e11ed9ed2d3f0e5995", "1f5_buildings.csv.gz": "334aea21e532e178bf5c54d028158906", "1f7_buildings.csv.gz": "0e8c3d2e005eb04c6852a8aa993f5a76", "217_buildings.csv.gz": "296e9ba121fea752b865a48e5c0fe8a5", "219_buildings.csv.gz": "1d19b6626d738f7706f75c2935aaaff4", "21d_buildings.csv.gz": "28bfca1f8668f59db021d3a195994768", "21f_buildings.csv.gz": "06325c8b0a8f6ed598b7dc6f0bb5adf2", "221_buildings.csv.gz": "a354ffc1f7226d525c7cf53848975da1", "223_buildings.csv.gz": "3bda1339d561b3bc749220877f1384d9", "225_buildings.csv.gz": "8eb02ad77919d9e551138a14d3ad1bbc", "227_buildings.csv.gz": "c07aceb7c81f83a653810befa0695b61", "22f_buildings.csv.gz": "97d63e30e008ec4424f6b0641b75377c", "231_buildings.csv.gz": "f4bc384ed74552ddcfe2e69107b91345", "233_buildings.csv.gz": "081756e7bdcfdc2aee9114c4cfe62bd8", "23b_buildings.csv.gz": "75776d3dcbc90cf3a596664747880134", "23d_buildings.csv.gz": "e5d0b9b7b14601f58cfdb9ea170e9520", "23f_buildings.csv.gz": "77f38466419b4d391be8e4f05207fdf5", "3d1_buildings.csv.gz": "6659c97bd765250b0dee4b1b7ff583a9", "3d5_buildings.csv.gz": "c27d8f6b2808549606f00bc04d8b42bc", "3d7_buildings.csv.gz": "abdef2e68cc31c67dbb6e60c4c40483e", "3d9_buildings.csv.gz": "4c06ae37d8e76626345a52a32f989de9", "3db_buildings.csv.gz": "e83ca0115eaf4ec0a72aaf932b00442a", "b5b_buildings.csv.gz": "5e5f59cb17b81137d89c4bab8107e837", } filename_glob = "*_buildings.csv" zipfile_glob = "*_buildings.csv.gz" meta_data_url = "https://sites.research.google/open-buildings/tiles.geojson" meta_data_filename = "tiles.geojson"
[docs] def __init__( self, paths: Union[str, Iterable[str]] = "data", crs: Optional[CRS] = None, res: float = 0.0001, transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None, checksum: bool = False, ) -> None: """Initialize a new Dataset instance. Args: paths: one or more root directories to search or files to load crs: :term:`coordinate reference system (CRS)` to warp to (defaults to the CRS of the first file found) res: resolution of the dataset in units of CRS transforms: a function/transform that takes input sample and its target as entry and returns a transformed version checksum: if True, check the MD5 of the downloaded files (may be slow) Raises: FileNotFoundError: if no files are found in ``root`` .. versionchanged:: 0.5 *root* was renamed to *paths*. """ self.paths = paths self.res = res self.checksum = checksum self.res = res self.transforms = transforms self._verify() # Create an R-tree to index the dataset using the polygon centroid as bounds self.index = Index(interleaved=False, properties=Property(dimension=3)) assert isinstance(self.paths, str) with open(os.path.join(self.paths, "tiles.geojson")) as f: data = json.load(f) features = data["features"] features_filenames = [ feature["properties"]["tile_url"].split("/")[-1] for feature in features ] # get csv filename polygon_files = glob.glob(os.path.join(self.paths, self.zipfile_glob)) polygon_filenames = [f.split(os.sep)[-1] for f in polygon_files] matched_features = [ feature for filename, feature in zip(features_filenames, features) if filename in polygon_filenames ] i = 0 source_crs = CRS.from_dict({"init": "epsg:4326"}) for feature in matched_features: if crs is None: crs = CRS.from_dict(source_crs) c = feature["geometry"]["coordinates"][0] xs = [x[0] for x in c] ys = [x[1] for x in c] minx, miny, maxx, maxy = min(xs), min(ys), max(xs), max(ys) (minx, maxx), (miny, maxy) = fiona.transform.transform( source_crs.to_dict(), crs.to_dict(), [minx, maxx], [miny, maxy] ) mint = 0 maxt = sys.maxsize coords = (minx, maxx, miny, maxy, mint, maxt) filepath = os.path.join( self.paths, feature["properties"]["tile_url"].split("/")[-1] ) self.index.insert(i, coords, filepath) i += 1 if i == 0: raise FileNotFoundError( f"No {self.__class__.__name__} data was found in '{self.paths}'" ) self._crs = crs self._source_crs = source_crs
[docs] def __getitem__(self, query: BoundingBox) -> dict[str, Any]: """Retrieve image/mask and metadata indexed by query. Args: query: (minx, maxx, miny, maxy, mint, maxt) coordinates to index Returns: sample of image/mask and metadata for the given query. If there are not matching shapes found within the query, an empty raster is returned Raises: IndexError: if query is not found in the index """ hits = self.index.intersection(tuple(query), objects=True) filepaths = cast(list[str], [hit.object for hit in hits]) if not filepaths: raise IndexError( f"query: {query} not found in index with bounds: {self.bounds}" ) shapes = self._filter_geometries(query, filepaths) # Rasterize geometries width = (query.maxx - query.minx) / self.res height = (query.maxy - query.miny) / self.res transform = rasterio.transform.from_bounds( query.minx, query.miny, query.maxx, query.maxy, width, height ) if shapes: masks = rasterio.features.rasterize( shapes, out_shape=(round(height), round(width)), transform=transform ) masks = torch.tensor(masks).unsqueeze(0) else: masks = torch.zeros(size=(1, round(height), round(width))) sample = {"mask": masks, "crs": self.crs, "bbox": query} if self.transforms is not None: sample = self.transforms(sample) return sample
def _filter_geometries( self, query: BoundingBox, filepaths: list[str] ) -> list[dict[str, Any]]: """Filters a df read from the polygon csv file based on query and conf thresh. Args: query: (minx, maxx, miny, maxy, mint, maxt) coordinates to index filepaths: filepaths to files that were hits from rmtree index Returns: List with all polygons from all hit filepaths """ # We need to know the bounding box of the query in the source CRS (minx, maxx), (miny, maxy) = fiona.transform.transform( self._crs.to_dict(), self._source_crs.to_dict(), [query.minx, query.maxx], [query.miny, query.maxy], ) df_query = ( "longitude >= {} & longitude <= {} & " "latitude >= {} & latitude <= {}" ).format(minx, maxx, miny, maxy) shapes = [] for f in filepaths: csv_chunks = pd.read_csv(f, chunksize=200000, compression="gzip") for chunk in csv_chunks: df = chunk.query(df_query) # Warp geometries to requested CRS polygon_series = df["geometry"].map(self._wkt_fiona_geom_transform) shapes.extend(polygon_series.values.tolist()) return shapes def _wkt_fiona_geom_transform(self, x: str) -> dict[str, Any]: """Function to transform a geometry string into new crs. Args: x: Polygon string Returns: transformed geometry in geojson format """ x = json.dumps(shapely.geometry.mapping(wkt.loads(x))) x = json.loads(x.replace("'", '"')) import fiona if hasattr(fiona, "model"): import fiona.model geom = fiona.model.Geometry(**x) else: geom = x transformed: dict[str, Any] = fiona.transform.transform_geom( self._source_crs.to_dict(), self._crs.to_dict(), geom ) return transformed def _verify(self) -> None: """Verify the integrity of the dataset. Raises: RuntimeError: if dataset is missing or checksum fails FileNotFoundError: if metadata file is not found in root """ # Check if the zip files have already been downloaded and checksum assert isinstance(self.paths, str) pathname = os.path.join(self.paths, self.zipfile_glob) i = 0 for zipfile in glob.iglob(pathname): filename = os.path.basename(zipfile) if self.checksum and not check_integrity(zipfile, self.md5s[filename]): raise RuntimeError(f"Dataset found, but corrupted: {filename}.") i += 1 if i != 0: return # check if the metadata file has been downloaded if not os.path.exists(os.path.join(self.paths, self.meta_data_filename)): raise FileNotFoundError( f"Meta data file {self.meta_data_filename} " f"not found in in `paths={self.paths!r}`." ) raise RuntimeError( f"Dataset not found in `paths={self.paths!r}` " "either specify a different `paths` or make sure you " "have manually downloaded the dataset as suggested in the documentation." )
[docs] def plot( self, sample: dict[str, Any], show_titles: bool = True, suptitle: Optional[str] = None, ) -> Figure: """Plot a sample from the dataset. Args: sample: a sample returned by :meth:`__getitem__` show_titles: flag indicating whether to show titles above each panel suptitle: optional string to use as a suptitle Returns: a matplotlib Figure with the rendered sample """ mask = sample["mask"].permute(1, 2, 0) showing_predictions = "prediction" in sample if showing_predictions: pred = sample["prediction"].permute(1, 2, 0) ncols = 2 else: ncols = 1 fig, axs = plt.subplots(nrows=1, ncols=ncols, figsize=(ncols * 4, 4)) if showing_predictions: axs[0].imshow(mask) axs[0].axis("off") axs[1].imshow(pred) axs[1].axis("off") if show_titles: axs[0].set_title("Mask") axs[1].set_title("Prediction") else: axs.imshow(mask) axs.axis("off") if show_titles: axs.set_title("Mask") if suptitle is not None: plt.suptitle(suptitle) return fig

© Copyright 2021, Microsoft Corporation. Revision b9653beb.

Built with Sphinx using a theme provided by Read the Docs.
Read the Docs v: stable
Versions
latest
stable
v0.5.2
v0.5.1
v0.5.0
v0.4.1
v0.4.0
v0.3.1
v0.3.0
v0.2.1
v0.2.0
v0.1.1
v0.1.0
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.

Docs

Access comprehensive developer documentation for PyTorch

View Docs

Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials

Resources

Find development resources and get your questions answered

View Resources