Skip to content

Sentinel AWS Data Connector Documentation

Documentation for the terrakit.download.data_connectors.sentinel_aws data connector module.

terrakit.download.data_connectors.sentinel_aws

Sentinel_AWS

Bases: Connector

Class for interacting with Sentinel AWS data via STAC API.

Attributes:

Name Type Description
connector_type str

Type of data connector, always "sentinel_aws".

stac_url str

Base URL for the STAC API.

collections list

List of available collections.

collections_details dict

Detailed information about collections.

Source code in terrakit/download/data_connectors/sentinel_aws.py
class Sentinel_AWS(Connector):
    """
    Class for interacting with Sentinel AWS data via STAC API.

    Attributes:
        connector_type (str): Type of data connector, always "sentinel_aws".
        stac_url (str): Base URL for the STAC API.
        collections (list): List of available collections.
        collections_details (dict): Detailed information about collections.
    """

    def __init__(self):
        """
        Initialize Sentinel_AWS class with default attributes.
        """
        self.connector_type = "sentinel_aws"
        self.stac_url = "https://earth-search.aws.element84.com/v1/"
        self.collections: list[Any] = load_and_list_collections(
            connector_type="sentinel_aws"
        )
        self.collections_details = load_and_list_collections(
            as_json=True, connector_type="sentinel_aws"
        )

    def list_collections(self) -> list[Any]:
        """
        List available collections.

        Returns:
            list: List of available collections.
        """

        logger.info("Listing available collections")
        return self.collections

    def find_data(
        self,
        data_collection_name: str,
        date_start: str,
        date_end: str,
        area_polygon=None,
        bbox=None,
        bands=[],
        maxcc=100,
        data_connector_spec=None,
    ) -> Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]:
        """
        Find Sentinel AWS data based on given parameters.

        Args:
            data_collection_name (str): Name of the data collection.
            date_start (str): Start date in 'YYYY-MM-DD' format.
            date_end (str): End date in 'YYYY-MM-DD' format.
            bands (list): List of bands to fetch.
            area_polygon (list, optional): Polygon defining the area of interest.
            bbox (list, optional): Bounding box defining the area of interest.
            maxcc (int, optional): Maximum cloud cover percentage.
            data_connector_spec (dict, optional): Additional data connector specifications.

        Returns:
            tuple: A tuple containing unique dates and STAC items.
        """
        logger.info("Listing Sentinel AWS data")

        check_collection_exists(data_collection_name, self.collections)

        check_start_end_date(date_start=date_start, date_end=date_end)
        check_area_polygon(
            area_polygon=area_polygon, connector_type=self.connector_type
        )
        check_bbox(bbox=bbox, connector_type=self.connector_type)

        collection_detials = self._get_collection_info(data_collection_name)
        fields = self._get_search_fields(collection_detials)

        try:
            unique_dates, stac_items = find_items(
                self.stac_url,
                bbox,
                date_start,
                date_end,
                bands=bands,
                collections=[data_collection_name],
                limit=250,
                maxcc=maxcc,
                data_connector_spec=data_connector_spec,
                fields=fields,
            )

        except ValueError as e:
            error_msg = f"Unable to find data for collection '{data_collection_name}. This could be due to the parameters set:\n\t bbox={bbox}, start_date={date_start}, end_date={date_end}, collection={data_collection_name}, fields={fields}, max_cc={maxcc}."
            logger.exception(error_msg)
            raise TerrakitValueError(error_msg) from e

        stac_items = [
            {"id": item.id, "properties": item.properties} for item in stac_items
        ]

        return unique_dates, stac_items

    def get_data(
        self,
        data_collection_name,
        date_start,
        date_end,
        area_polygon=None,
        bbox=None,
        bands=[],
        maxcc=100,
        data_connector_spec=None,
        save_file=None,
        working_dir=".",
    ) -> Union[xr.DataArray, None]:
        """
        Get Sentinel AWS data based on given parameters.

        Args:
            data_collection_name (str): Name of the data collection.
            date_start (str): Start date in 'YYYY-MM-DD' format.
            date_end (str): End date in 'YYYY-MM-DD' format.
            area_polygon (list, optional): Polygon defining the area of interest.
            bbox (list, optional): Bounding box defining the area of interest.
            bands (list, optional): List of bands to retrieve.
            maxcc (int, optional): Maximum cloud cover percentage.
            data_connector_spec (dict, optional): Additional data connector specifications.
            save_file (str, optional): Path to save the data. If provided, individual GeoTIFF files
                will be saved for each date with the naming pattern: {save_file}_{date}.tif. Each file
                contains all requested bands for that specific date. If None, no files are saved to disk. Defaults to None.
            working_dir (str, optional): Working directory for saving files.

        Returns:
            xarray.DataArray: An xarray DataArray containing all fetched data with dimensions (time, band, y, x).
                All dates are stacked along the time dimension, and all bands are stacked along the band dimension.
                If save_file is provided, individual date files are also saved to disk.
        """
        check_collection_exists(data_collection_name, self.collections)
        # Check that the bands the user has requested exist in the data collection
        check_bands(
            connector_type=self.connector_type,
            collection_name=data_collection_name,
            bands=bands,
        )

        if data_connector_spec is None:
            data_connector_spec_list = [
                X
                for X in self.collections_details
                if X["collection_name"] == data_collection_name
            ]
            if len(data_connector_spec_list) == 0:
                error_msg = (
                    f"Unable to find collection details for '{data_collection_name}'"
                )
                logger.error(error_msg)
                raise TerrakitValueError(error_msg)
            data_connector_spec = data_connector_spec_list[0]

        try:
            unique_dates, results = self.find_data(
                data_collection_name=data_collection_name,
                date_start=date_start,
                date_end=date_end,
                bbox=bbox,
                bands=bands,
                maxcc=maxcc,
                data_connector_spec=data_connector_spec,
            )
        except TerrakitValueError as e:
            raise e

        da_list: list[Any] = []
        for date in unique_dates:  # type: ignore[union-attr]
            da: xr.DataArray = get_sh_aws_data(
                self.stac_url,
                bbox,
                date_start,
                date_end,
                bands=bands,
                collections=[data_collection_name],
                limit=250,
                maxcc=maxcc,
                data_connector_spec=data_connector_spec,
            )
            date_time_stamp = datetime.strptime(date, "%Y-%m-%d")
            da = da.assign_coords({"band": bands, "time": date_time_stamp})
            da_list.append(da)

        da = xr.concat(da_list, dim="time")
        save_data_array_to_file(da, save_file)

        return da

    def _get_collection_info(self, collection_name) -> dict[str, Any]:
        collection_info = {}
        for i, collections_details in enumerate(self.collections_details):
            if collections_details["collection_name"] == collection_name:
                collection_info = self.collections_details[i]
        return collection_info

    def _get_search_fields(self, collection_info: dict[str, Any]) -> str:
        fields = "{}"
        if "search" in collection_info:
            if "fields" in collection_info["search"]:
                fields = collection_info["search"]["fields"]
        if type(fields) is not str:
            err_msg = f"'fields' value in collections.json must be a str, not {type(fields)}: {fields}"
            raise TerrakitValueError(err_msg)
        return fields

list_collections

List available collections.

Returns:

Name Type Description
list list[Any]

List of available collections.

Source code in terrakit/download/data_connectors/sentinel_aws.py
def list_collections(self) -> list[Any]:
    """
    List available collections.

    Returns:
        list: List of available collections.
    """

    logger.info("Listing available collections")
    return self.collections

find_data

Find Sentinel AWS data based on given parameters.

Parameters:

Name Type Description Default
data_collection_name str

Name of the data collection.

required
date_start str

Start date in 'YYYY-MM-DD' format.

required
date_end str

End date in 'YYYY-MM-DD' format.

required
bands list

List of bands to fetch.

[]
area_polygon list

Polygon defining the area of interest.

None
bbox list

Bounding box defining the area of interest.

None
maxcc int

Maximum cloud cover percentage.

100
data_connector_spec dict

Additional data connector specifications.

None

Returns:

Name Type Description
tuple Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]

A tuple containing unique dates and STAC items.

Source code in terrakit/download/data_connectors/sentinel_aws.py
def find_data(
    self,
    data_collection_name: str,
    date_start: str,
    date_end: str,
    area_polygon=None,
    bbox=None,
    bands=[],
    maxcc=100,
    data_connector_spec=None,
) -> Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]:
    """
    Find Sentinel AWS data based on given parameters.

    Args:
        data_collection_name (str): Name of the data collection.
        date_start (str): Start date in 'YYYY-MM-DD' format.
        date_end (str): End date in 'YYYY-MM-DD' format.
        bands (list): List of bands to fetch.
        area_polygon (list, optional): Polygon defining the area of interest.
        bbox (list, optional): Bounding box defining the area of interest.
        maxcc (int, optional): Maximum cloud cover percentage.
        data_connector_spec (dict, optional): Additional data connector specifications.

    Returns:
        tuple: A tuple containing unique dates and STAC items.
    """
    logger.info("Listing Sentinel AWS data")

    check_collection_exists(data_collection_name, self.collections)

    check_start_end_date(date_start=date_start, date_end=date_end)
    check_area_polygon(
        area_polygon=area_polygon, connector_type=self.connector_type
    )
    check_bbox(bbox=bbox, connector_type=self.connector_type)

    collection_detials = self._get_collection_info(data_collection_name)
    fields = self._get_search_fields(collection_detials)

    try:
        unique_dates, stac_items = find_items(
            self.stac_url,
            bbox,
            date_start,
            date_end,
            bands=bands,
            collections=[data_collection_name],
            limit=250,
            maxcc=maxcc,
            data_connector_spec=data_connector_spec,
            fields=fields,
        )

    except ValueError as e:
        error_msg = f"Unable to find data for collection '{data_collection_name}. This could be due to the parameters set:\n\t bbox={bbox}, start_date={date_start}, end_date={date_end}, collection={data_collection_name}, fields={fields}, max_cc={maxcc}."
        logger.exception(error_msg)
        raise TerrakitValueError(error_msg) from e

    stac_items = [
        {"id": item.id, "properties": item.properties} for item in stac_items
    ]

    return unique_dates, stac_items

get_data

Get Sentinel AWS data based on given parameters.

Parameters:

Name Type Description Default
data_collection_name str

Name of the data collection.

required
date_start str

Start date in 'YYYY-MM-DD' format.

required
date_end str

End date in 'YYYY-MM-DD' format.

required
area_polygon list

Polygon defining the area of interest.

None
bbox list

Bounding box defining the area of interest.

None
bands list

List of bands to retrieve.

[]
maxcc int

Maximum cloud cover percentage.

100
data_connector_spec dict

Additional data connector specifications.

None
save_file str

Path to save the data. If provided, individual GeoTIFF files will be saved for each date with the naming pattern: {save_file}_{date}.tif. Each file contains all requested bands for that specific date. If None, no files are saved to disk. Defaults to None.

None
working_dir str

Working directory for saving files.

'.'

Returns:

Type Description
Union[DataArray, None]

xarray.DataArray: An xarray DataArray containing all fetched data with dimensions (time, band, y, x). All dates are stacked along the time dimension, and all bands are stacked along the band dimension. If save_file is provided, individual date files are also saved to disk.

Source code in terrakit/download/data_connectors/sentinel_aws.py
def get_data(
    self,
    data_collection_name,
    date_start,
    date_end,
    area_polygon=None,
    bbox=None,
    bands=[],
    maxcc=100,
    data_connector_spec=None,
    save_file=None,
    working_dir=".",
) -> Union[xr.DataArray, None]:
    """
    Get Sentinel AWS data based on given parameters.

    Args:
        data_collection_name (str): Name of the data collection.
        date_start (str): Start date in 'YYYY-MM-DD' format.
        date_end (str): End date in 'YYYY-MM-DD' format.
        area_polygon (list, optional): Polygon defining the area of interest.
        bbox (list, optional): Bounding box defining the area of interest.
        bands (list, optional): List of bands to retrieve.
        maxcc (int, optional): Maximum cloud cover percentage.
        data_connector_spec (dict, optional): Additional data connector specifications.
        save_file (str, optional): Path to save the data. If provided, individual GeoTIFF files
            will be saved for each date with the naming pattern: {save_file}_{date}.tif. Each file
            contains all requested bands for that specific date. If None, no files are saved to disk. Defaults to None.
        working_dir (str, optional): Working directory for saving files.

    Returns:
        xarray.DataArray: An xarray DataArray containing all fetched data with dimensions (time, band, y, x).
            All dates are stacked along the time dimension, and all bands are stacked along the band dimension.
            If save_file is provided, individual date files are also saved to disk.
    """
    check_collection_exists(data_collection_name, self.collections)
    # Check that the bands the user has requested exist in the data collection
    check_bands(
        connector_type=self.connector_type,
        collection_name=data_collection_name,
        bands=bands,
    )

    if data_connector_spec is None:
        data_connector_spec_list = [
            X
            for X in self.collections_details
            if X["collection_name"] == data_collection_name
        ]
        if len(data_connector_spec_list) == 0:
            error_msg = (
                f"Unable to find collection details for '{data_collection_name}'"
            )
            logger.error(error_msg)
            raise TerrakitValueError(error_msg)
        data_connector_spec = data_connector_spec_list[0]

    try:
        unique_dates, results = self.find_data(
            data_collection_name=data_collection_name,
            date_start=date_start,
            date_end=date_end,
            bbox=bbox,
            bands=bands,
            maxcc=maxcc,
            data_connector_spec=data_connector_spec,
        )
    except TerrakitValueError as e:
        raise e

    da_list: list[Any] = []
    for date in unique_dates:  # type: ignore[union-attr]
        da: xr.DataArray = get_sh_aws_data(
            self.stac_url,
            bbox,
            date_start,
            date_end,
            bands=bands,
            collections=[data_collection_name],
            limit=250,
            maxcc=maxcc,
            data_connector_spec=data_connector_spec,
        )
        date_time_stamp = datetime.strptime(date, "%Y-%m-%d")
        da = da.assign_coords({"band": bands, "time": date_time_stamp})
        da_list.append(da)

    da = xr.concat(da_list, dim="time")
    save_data_array_to_file(da, save_file)

    return da