Shortcuts

Source code for torchgeo.datasets.skippd

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

"""SKy Images and Photovoltaic Power Dataset (SKIPP'D)."""

import os
from typing import Any, Callable, Optional, Union

import matplotlib.pyplot as plt
import numpy as np
import torch
from einops import rearrange
from matplotlib.figure import Figure
from torch import Tensor

from .geo import NonGeoDataset
from .utils import download_url, extract_archive


[docs]class SKIPPD(NonGeoDataset): """SKy Images and Photovoltaic Power Dataset (SKIPP'D). The `SKIPP'D dataset <https://purl.stanford.edu/dj417rh1007>`_ contains ground-based fish-eye photos of the sky for solar forecasting tasks. Dataset Format: * .hdf5 file containing images and labels * .npy files with corresponding datetime timestamps Dataset Features: * fish-eye RGB images (64x64px) * power output measurements from 30-kW rooftop PV array * 1-min interval across 3 years (2017-2019) Nowcast task: * 349,372 images under the split key *trainval* * 14,003 images under the split key *test* Forecast task: * 130,412 images under the split key *trainval* * 2,462 images under the split key *test* * consists of a concatenated RGB time-series of 16 time-steps If you use this dataset in your research, please cite: * https://doi.org/10.48550/arXiv.2207.00913 .. versionadded:: 0.5 """ url = "https://hf.co/datasets/torchgeo/skippd/resolve/a16c7e200b4618cd93be3143cdb973e3f21498fa/{}" # noqa: E501 md5 = { "forecast": "f4f3509ddcc83a55c433be9db2e51077", "nowcast": "0000761d403e45bb5f86c21d3c69aa80", } data_file_name = "2017_2019_images_pv_processed_{}.hdf5" zipfile_name = "2017_2019_images_pv_processed_{}.zip" valid_splits = ["trainval", "test"] valid_tasks = ["nowcast", "forecast"] dateformat = "%m/%d/%Y, %H:%M:%S"
[docs] def __init__( self, root: str = "data", split: str = "trainval", task: str = "nowcast", transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None, download: bool = False, checksum: bool = False, ) -> None: """Initialize a new Dataset instance. Args: root: root directory where dataset can be found split: one of "trainval", or "test" task: one fo "nowcast", or "forecast" transforms: a function/transform that takes an input sample and returns a transformed version download: if True, download dataset and store it in the root directory checksum: if True, check the MD5 after downloading files (may be slow) Raises: AssertionError: if ``task`` or ``split`` is invalid ImportError: if h5py is not installed RuntimeError: if ``download=False`` but dataset is missing or checksum fails """ assert ( split in self.valid_splits ), f"Please choose one of these valid data splits {self.valid_splits}." self.split = split assert ( task in self.valid_tasks ), f"Please choose one of these valid tasks {self.valid_tasks}." self.task = task self.root = root self.transforms = transforms self.download = download self.checksum = checksum try: import h5py # noqa: F401 except ImportError: raise ImportError( "h5py is not installed and is required to use this dataset" ) self._verify()
[docs] def __len__(self) -> int: """Return the number of data points in the dataset. Returns: length of the dataset """ import h5py with h5py.File( os.path.join(self.root, self.data_file_name.format(self.task)), "r" ) as f: num_datapoints: int = f[self.split]["pv_log"].shape[0] return num_datapoints
[docs] def __getitem__(self, index: int) -> dict[str, Union[str, Tensor]]: """Return an index within the dataset. Args: index: index to return Returns: data and label at that index """ sample: dict[str, Union[str, Tensor]] = {"image": self._load_image(index)} sample.update(self._load_features(index)) if self.transforms is not None: sample = self.transforms(sample) return sample
def _load_image(self, index: int) -> Tensor: """Load the input image. Args: index: index of image to load Returns: image tensor at index """ import h5py with h5py.File( os.path.join(self.root, self.data_file_name.format(self.task)), "r" ) as f: arr = f[self.split]["images_log"][index] # forecast has dimension [16, 64, 64, 3] but reshape to [48, 64, 64] # https://github.com/yuhao-nie/Stanford-solar-forecasting-dataset/blob/main/models/SUNSET_forecast.ipynb if self.task == "forecast": arr = rearrange(arr, "t h w c-> (t c) h w") else: arr = rearrange(arr, "h w c -> c h w") tensor = torch.from_numpy(arr).to(torch.float32) return tensor def _load_features(self, index: int) -> dict[str, Union[str, Tensor]]: """Load label. Args: index: index of label to load Returns: label tensor at index """ import h5py with h5py.File( os.path.join(self.root, self.data_file_name.format(self.task)), "r" ) as f: label = f[self.split]["pv_log"][index] path = os.path.join(self.root, f"times_{self.split}_{self.task}.npy") datestring = np.load(path, allow_pickle=True)[index].strftime(self.dateformat) features: dict[str, Union[str, Tensor]] = { "label": torch.tensor(label, dtype=torch.float32), "date": datestring, } return features def _verify(self) -> None: """Verify the integrity of the dataset. Raises: RuntimeError: if ``download=False`` but dataset is missing or checksum fails """ # Check if the extracted files already exist pathname = os.path.join(self.root, self.data_file_name.format(self.task)) if os.path.exists(pathname): return # Check if the zip files have already been downloaded pathname = os.path.join(self.root, self.zipfile_name.format(self.task)) if os.path.exists(pathname): self._extract() return # Check if the user requested to download the dataset if not self.download: raise RuntimeError( f"Dataset not found in `root={self.root}` and `download=False`, " "either specify a different `root` directory or use `download=True` " "to automatically download the dataset." ) # Download the dataset self._download() self._extract() def _download(self) -> None: """Download the dataset and extract it. Raises: RuntimeError: if download doesn't work correctly or checksums don't match """ download_url( self.url.format(self.zipfile_name.format(self.task)), self.root, filename=self.zipfile_name.format(self.task), md5=self.md5[self.task] if self.checksum else None, ) self._extract() def _extract(self) -> None: """Extract the dataset.""" zipfile_path = os.path.join(self.root, self.zipfile_name.format(self.task)) extract_archive(zipfile_path, self.root)
[docs] def plot( self, sample: dict[str, Any], show_titles: bool = True, suptitle: Optional[str] = None, ) -> Figure: """Plot a sample from the dataset. In the ``forecast`` task the latest image is plotted. Args: sample: a sample return by :meth:`__getitem__` show_titles: flag indicating whether to show titles above each panel suptitle: optional suptitle to use for figure Returns: a matplotlib Figure with the rendered sample """ if self.task == "nowcast": image, label = sample["image"].permute(1, 2, 0), sample["label"].item() else: image, label = ( sample["image"].permute(1, 2, 0).reshape(64, 64, 3, 16)[:, :, :, -1], sample["label"][-1].item(), ) showing_predictions = "prediction" in sample if showing_predictions: prediction = sample["prediction"].item() fig, ax = plt.subplots(1, 1, figsize=(10, 10)) ax.imshow(image / 255) ax.axis("off") if show_titles: title = f"Label: {label:.3f}" if showing_predictions: title += f"\nPrediction: {prediction:.3f}" ax.set_title(title) if suptitle is not None: plt.suptitle(suptitle) return fig

© Copyright 2021, Microsoft Corporation. Revision b9653beb.

Built with Sphinx using a theme provided by Read the Docs.
Read the Docs v: stable
Versions
latest
stable
v0.5.2
v0.5.1
v0.5.0
v0.4.1
v0.4.0
v0.3.1
v0.3.0
v0.2.1
v0.2.0
v0.1.1
v0.1.0
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.

Docs

Access comprehensive developer documentation for PyTorch

View Docs

Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials

Resources

Find development resources and get your questions answered

View Resources