Source code for torchgeo.datasets.inaturalist

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

"""Dataset for iNaturalist."""

import glob
import os
import sys
from typing import Any

import pandas as pd
from import CRS

from .geo import GeoDataset
from .utils import BoundingBox, disambiguate_timestamp

[docs]class INaturalist(GeoDataset): """Dataset for iNaturalist. `iNaturalist <>`__ is a joint initiative of the California Academy of Sciences and the National Geographic Society. It allows citizen scientists to upload observations of organisms that can be downloaded by scientists and researchers. If you use an iNaturalist dataset in your research, please cite it according to: * .. versionadded:: 0.3 """ res = 0 _crs = CRS.from_epsg(4326) # Lat/Lon
[docs] def __init__(self, root: str = "data") -> None: """Initialize a new Dataset instance. Args: root: root directory where dataset can be found Raises: FileNotFoundError: if no files are found in ``root`` """ super().__init__() self.root = root files = glob.glob(os.path.join(root, "**.csv")) if not files: raise FileNotFoundError(f"Dataset not found in `root={self.root}`") # Read CSV file data = pd.read_csv( files[0], engine="c", usecols=["observed_on", "time_observed_at", "latitude", "longitude"], ) # Dataset contains many possible timestamps: # # * observed_on_string: no consistent format (can't use) # * observed_on: day precision (better) # * time_observed_at: second precision (best) # * created_at: when observation was submitted (shouldn't use) # * updated_at: when submission was updated (shouldn't use) # # The created_at/updated_at timestamps can be years after the actual submission, # so they shouldn't be used, even if observed_on/time_observed_at are missing. # Convert from pandas DataFrame to rtree Index i = 0 for date, time, y, x in data.itertuples(index=False, name=None): # Skip rows without lat/lon if pd.isna(y) or pd.isna(x): continue if not pd.isna(time): mint, maxt = disambiguate_timestamp(time, "%Y-%m-%d %H:%M:%S %z") elif not pd.isna(date): mint, maxt = disambiguate_timestamp(date, "%Y-%m-%d") else: mint, maxt = 0, sys.maxsize coords = (x, x, y, y, mint, maxt) self.index.insert(i, coords) i += 1
[docs] def __getitem__(self, query: BoundingBox) -> dict[str, Any]: """Retrieve metadata indexed by query. Args: query: (minx, maxx, miny, maxy, mint, maxt) coordinates to index Returns: sample of metadata at that index Raises: IndexError: if query is not found in the index """ hits = self.index.intersection(tuple(query), objects=True) bboxes = [hit.bbox for hit in hits] if not bboxes: raise IndexError( f"query: {query} not found in index with bounds: {self.bounds}" ) sample = {"crs":, "bbox": bboxes} return sample

© Copyright 2021, Microsoft Corporation. Revision 6694cbd4.

Built with Sphinx using a theme provided by Read the Docs.
Read the Docs v: stable
On Read the Docs
Project Home

Free document hosting provided by Read the Docs.


Access comprehensive developer documentation for PyTorch

View Docs


Get in-depth tutorials for beginners and advanced developers

View Tutorials


Find development resources and get your questions answered

View Resources