Shortcuts

Source code for torchgeo.datasets.landcoverai

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

"""LandCover.ai dataset."""

import hashlib
import os
from functools import lru_cache
from typing import Any, Callable, Dict, Optional

import numpy as np
import pytorch_lightning as pl
import torch
from PIL import Image
from torch import Tensor
from torch.utils.data import DataLoader

from .geo import VisionDataset
from .utils import check_integrity, download_and_extract_archive, working_dir

# https://github.com/pytorch/pytorch/issues/60979
# https://github.com/pytorch/pytorch/pull/61045
DataLoader.__module__ = "torch.utils.data"


class LandCoverAI(VisionDataset):
    r"""LandCover.ai dataset.

    The `LandCover.ai <https://landcover.ai/>`_ (Land Cover from Aerial Imagery)
    dataset is a dataset for automatic mapping of buildings, woodlands, water and
    roads from aerial images. This implementation is specifically for Version 1 of
    Landcover.ai.

    Dataset features:

    * land cover from Poland, Central Europe
    * three spectral bands - RGB
    * 33 orthophotos with 25 cm per pixel resolution (~9000x9500 px)
    * 8 orthophotos with 50 cm per pixel resolution (~4200x4700 px)
    * total area of 216.27 km\ :sup:`2`

    Dataset format:

    * rasters are three-channel GeoTiffs with EPSG:2180 spatial reference system
    * masks are single-channel GeoTiffs with EPSG:2180 spatial reference system

    Dataset classes:

    1. building (1.85 km\ :sup:`2`\ )
    2. woodland (72.02 km\ :sup:`2`\ )
    3. water (13.15 km\ :sup:`2`\ )
    4. road (3.5 km\ :sup:`2`\ )

    If you use this dataset in your research, please cite the following paper:

    * https://arxiv.org/abs/2005.02264v3

    .. note::

       This dataset requires the following additional library to be installed:

       * `opencv-python <https://pypi.org/project/opencv-python/>`_ to generate
         the train/val/test split
    """

    url = "https://landcover.ai/download/landcover.ai.v1.zip"
    filename = "landcover.ai.v1.zip"
    md5 = "3268c89070e8734b4e91d531c0617e03"
    sha256 = "15ee4ca9e3fd187957addfa8f0d74ac31bc928a966f76926e11b3c33ea76daa1"

[docs] def __init__( self, root: str = "data", split: str = "train", transforms: Optional[Callable[[Dict[str, Tensor]], Dict[str, Tensor]]] = None, download: bool = False, checksum: bool = False, ) -> None: """Initialize a new LandCover.ai dataset instance. Args: root: root directory where dataset can be found split: one of "train", "val", or "test" transforms: a function/transform that takes input sample and its target as entry and returns a transformed version download: if True, download dataset and store it in the root directory checksum: if True, check the MD5 of the downloaded files (may be slow) Raises: AssertionError: if ``split`` argument is invalid RuntimeError: if ``download=False`` and data is not found, or checksums don't match """ assert split in ["train", "val", "test"] self.root = root self.split = split self.transforms = transforms self.checksum = checksum if download: self._download() if not self._check_integrity(): raise RuntimeError( "Dataset not found or corrupted. " + "You can use download=True to download it" ) with open(os.path.join(self.root, split + ".txt")) as f: self.ids = f.readlines()
[docs] def __getitem__(self, index: int) -> Dict[str, Tensor]: """Return an index within the dataset. Args: index: index to return Returns: data and label at that index """ id_ = self.ids[index].rstrip() sample = {"image": self._load_image(id_), "mask": self._load_target(id_)} if self.transforms is not None: sample = self.transforms(sample) return sample
[docs] def __len__(self) -> int: """Return the number of data points in the dataset. Returns: length of the dataset """ return len(self.ids)
@lru_cache() def _load_image(self, id_: str) -> Tensor: """Load a single image. Args: id_: unique ID of the image Returns: the image """ filename = os.path.join(self.root, "output", id_ + ".jpg") with Image.open(filename) as img: array = np.array(img) tensor: Tensor = torch.from_numpy(array) # type: ignore[attr-defined] # Convert from HxWxC to CxHxW tensor = tensor.permute((2, 0, 1)) return tensor @lru_cache() def _load_target(self, id_: str) -> Tensor: """Load the target mask for a single image. Args: id_: unique ID of the image Returns: the target mask """ filename = os.path.join(self.root, "output", id_ + "_m.png") with Image.open(filename) as img: array = np.array(img.convert("L")) tensor: Tensor = torch.from_numpy(array) # type: ignore[attr-defined] return tensor def _check_integrity(self) -> bool: """Check integrity of dataset. Returns: True if dataset files are found and/or MD5s match, else False """ integrity: bool = check_integrity( os.path.join(self.root, self.filename), self.md5 if self.checksum else None ) return integrity def _download(self) -> None: """Download the dataset and extract it. Raises: AssertionError: if the checksum of split.py does not match """ if self._check_integrity(): print("Files already downloaded and verified") return download_and_extract_archive( self.url, self.root, filename=self.filename, md5=self.md5 if self.checksum else None, ) # Generate train/val/test splits # Always check the sha256 of this file before executing # to avoid malicious code injection with working_dir(self.root): with open("split.py") as f: split = f.read().encode("utf-8") assert hashlib.sha256(split).hexdigest() == self.sha256 exec(split) class LandCoverAIDataModule(pl.LightningDataModule): """LightningDataModule implementation for the LandCover.ai dataset. Uses the train/val/test splits from the dataset. """
[docs] def __init__( self, root_dir: str, batch_size: int = 64, num_workers: int = 0, **kwargs: Any ) -> None: """Initialize a LightningDataModule for LandCover.ai based DataLoaders. Args: root_dir: The ``root`` arugment to pass to the Landcover.AI Dataset classes batch_size: The batch size to use in all created DataLoaders num_workers: The number of workers to use in all created DataLoaders """ super().__init__() # type: ignore[no-untyped-call] self.root_dir = root_dir self.batch_size = batch_size self.num_workers = num_workers
[docs] def preprocess(self, sample: Dict[str, Any]) -> Dict[str, Any]: """Transform a single sample from the Dataset. Args: sample: dictionary containing image and mask Returns: preprocessed sample """ sample["image"] = sample["image"] / 255.0 sample["image"] = sample["image"].float() sample["mask"] = sample["mask"].float().unsqueeze(0) + 1 return sample
[docs] def prepare_data(self) -> None: """Make sure that the dataset is downloaded. This method is only called once per run. """ _ = LandCoverAI(self.root_dir, download=False, checksum=False)
[docs] def setup(self, stage: Optional[str] = None) -> None: """Initialize the main ``Dataset`` objects. This method is called once per GPU per run. Args: stage: stage to set up """ train_transforms = self.preprocess val_test_transforms = self.preprocess self.train_dataset = LandCoverAI( self.root_dir, split="train", transforms=train_transforms ) self.val_dataset = LandCoverAI( self.root_dir, split="val", transforms=val_test_transforms ) self.test_dataset = LandCoverAI( self.root_dir, split="test", transforms=val_test_transforms )
[docs] def train_dataloader(self) -> DataLoader[Any]: """Return a DataLoader for training. Returns: training data loader """ return DataLoader( self.train_dataset, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=True, )
[docs] def val_dataloader(self) -> DataLoader[Any]: """Return a DataLoader for validation. Returns: validation data loader """ return DataLoader( self.val_dataset, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=False, )
[docs] def test_dataloader(self) -> DataLoader[Any]: """Return a DataLoader for testing. Returns: testing data loader """ return DataLoader( self.test_dataset, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=False, )

© Copyright 2021, Microsoft Corporation. Revision c2b56148.

Built with Sphinx using a theme provided by Read the Docs.
Read the Docs v: v0.1.1
Versions
latest
stable
v0.1.1
v0.1.0
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.

Docs

Access comprehensive developer documentation for PyTorch

View Docs

Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials

Resources

Find development resources and get your questions answered

View Resources