Skip to content

Sentinel AWS Data Connector Documentation

Documentation for the terrakit.download.data_connectors.sentinel_aws data connector module.

terrakit.download.data_connectors.sentinel_aws

Sentinel_AWS

Bases: Connector

Class for interacting with Sentinel AWS data via STAC API.

Attributes:

Name Type Description
connector_type str

Type of data connector, always "sentinel_aws".

stac_url str

Base URL for the STAC API.

collections list

List of available collections.

collections_details dict

Detailed information about collections.

Source code in terrakit/download/data_connectors/sentinel_aws.py
class Sentinel_AWS(Connector):
    """
    Class for interacting with Sentinel AWS data via STAC API.

    Attributes:
        connector_type (str): Type of data connector, always "sentinel_aws".
        stac_url (str): Base URL for the STAC API.
        collections (list): List of available collections.
        collections_details (dict): Detailed information about collections.
    """

    def __init__(self):
        """
        Initialize Sentinel_AWS class with default attributes.
        """
        self.connector_type = "sentinel_aws"
        self.stac_url = "https://earth-search.aws.element84.com/v1/"
        self.collections: list[Any] = load_and_list_collections(
            connector_type="sentinel_aws"
        )
        self.collections_details = load_and_list_collections(
            as_json=True, connector_type="sentinel_aws"
        )

    def list_collections(self) -> list[Any]:
        """
        List available collections.

        Returns:
            list: List of available collections.
        """

        logger.info("Listing available collections")
        return self.collections

    def find_data(
        self,
        data_collection_name: str,
        date_start: str,
        date_end: str,
        area_polygon=None,
        bbox=None,
        bands=[],
        maxcc=100,
        data_connector_spec=None,
    ) -> Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]:
        """
        Find Sentinel AWS data based on given parameters.

        Args:
            data_collection_name (str): Name of the data collection.
            date_start (str): Start date in 'YYYY-MM-DD' format.
            date_end (str): End date in 'YYYY-MM-DD' format.
            bands (list): List of bands to fetch.
            area_polygon (list, optional): Polygon defining the area of interest.
            bbox (list, optional): Bounding box defining the area of interest.
            maxcc (int, optional): Maximum cloud cover percentage.
            data_connector_spec (dict, optional): Additional data connector specifications.

        Returns:
            tuple: A tuple containing unique dates and STAC items.
        """
        logger.info("Listing Sentinel AWS data")

        check_collection_exists(data_collection_name, self.collections)

        check_start_end_date(date_start=date_start, date_end=date_end)
        check_area_polygon(
            area_polygon=area_polygon, connector_type=self.connector_type
        )
        check_bbox(bbox=bbox, connector_type=self.connector_type)

        collection_detials = self._get_collection_info(data_collection_name)
        fields = self._get_search_fields(collection_detials)

        try:
            unique_dates, stac_items = find_items(
                self.stac_url,
                bbox,
                date_start,
                date_end,
                bands=bands,
                collections=[data_collection_name],
                limit=250,
                maxcc=maxcc,
                data_connector_spec=data_connector_spec,
                fields=fields,
            )

        except ValueError as e:
            error_msg = f"Unable to find data for collection '{data_collection_name}. This could be due to the parameters set:\n\t bbox={bbox}, start_date={date_start}, end_date={date_end}, collection={data_collection_name}, fields={fields}, max_cc={maxcc}."
            logger.exception(error_msg)
            raise TerrakitValueError(error_msg) from e

        stac_items = [
            {"id": item.id, "properties": item.properties} for item in stac_items
        ]

        return unique_dates, stac_items

    def get_data(
        self,
        data_collection_name,
        date_start,
        date_end,
        area_polygon=None,
        bbox=None,
        bands=[],
        maxcc=100,
        data_connector_spec=None,
        save_file=None,
        working_dir=".",
    ) -> Union[xr.DataArray, None]:
        """
        Get Sentinel AWS data based on given parameters.

        Args:
            data_collection_name (str): Name of the data collection.
            date_start (str): Start date in 'YYYY-MM-DD' format.
            date_end (str): End date in 'YYYY-MM-DD' format.
            area_polygon (list, optional): Polygon defining the area of interest.
            bbox (list, optional): Bounding box defining the area of interest.
            bands (list, optional): List of bands to retrieve.
            maxcc (int, optional): Maximum cloud cover percentage.
            data_connector_spec (dict, optional): Additional data connector specifications.
            save_file (str, optional): Path to save the data.
            working_dir (str, optional): Working directory for saving files.

        Returns:
            xarray: An xarray Datasets containing the fetched data with dimensions (time, band, y, x).
        """
        check_collection_exists(data_collection_name, self.collections)
        # Check that the bands the user has requested exist in the data collection
        check_bands(
            connector_type=self.connector_type,
            collection_name=data_collection_name,
            bands=bands,
        )

        if data_connector_spec is None:
            data_connector_spec_list = [
                X
                for X in self.collections_details
                if X["collection_name"] == data_collection_name
            ]
            if len(data_connector_spec_list) == 0:
                error_msg = (
                    f"Unable to find collection details for '{data_collection_name}'"
                )
                logger.error(error_msg)
                raise TerrakitValueError(error_msg)
            data_connector_spec = data_connector_spec_list[0]

        try:
            unique_dates, results = self.find_data(
                data_collection_name=data_collection_name,
                date_start=date_start,
                date_end=date_end,
                bbox=bbox,
                bands=bands,
                maxcc=maxcc,
                data_connector_spec=data_connector_spec,
            )
        except TerrakitValueError as e:
            raise e

        da_list: list[Any] = []
        for date in unique_dates:  # type: ignore[union-attr]
            da: xr.DataArray = get_sh_aws_data(
                self.stac_url,
                bbox,
                date_start,
                date_end,
                bands=bands,
                collections=[data_collection_name],
                limit=250,
                maxcc=maxcc,
                data_connector_spec=data_connector_spec,
            )
            date_time_stamp = datetime.strptime(date, "%Y-%m-%d")
            da = da.assign_coords({"band": bands, "time": date_time_stamp})
            da_list.append(da)

        da = xr.concat(da_list, dim="time")
        save_data_array_to_file(da, save_file)

        return da

    def _get_collection_info(self, collection_name) -> dict[str, Any]:
        collection_info = {}
        for i, collections_details in enumerate(self.collections_details):
            if collections_details["collection_name"] == collection_name:
                collection_info = self.collections_details[i]
        return collection_info

    def _get_search_fields(self, collection_info: dict[str, Any]) -> str:
        fields = "{}"
        if "search" in collection_info:
            if "fields" in collection_info["search"]:
                fields = collection_info["search"]["fields"]
        if type(fields) is not str:
            err_msg = f"'fields' value in collections.json must be a str, not {type(fields)}: {fields}"
            raise TerrakitValueError(err_msg)
        return fields

list_collections

List available collections.

Returns:

Name Type Description
list list[Any]

List of available collections.

Source code in terrakit/download/data_connectors/sentinel_aws.py
def list_collections(self) -> list[Any]:
    """
    List available collections.

    Returns:
        list: List of available collections.
    """

    logger.info("Listing available collections")
    return self.collections

find_data

Find Sentinel AWS data based on given parameters.

Parameters:

Name Type Description Default
data_collection_name str

Name of the data collection.

required
date_start str

Start date in 'YYYY-MM-DD' format.

required
date_end str

End date in 'YYYY-MM-DD' format.

required
bands list

List of bands to fetch.

[]
area_polygon list

Polygon defining the area of interest.

None
bbox list

Bounding box defining the area of interest.

None
maxcc int

Maximum cloud cover percentage.

100
data_connector_spec dict

Additional data connector specifications.

None

Returns:

Name Type Description
tuple Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]

A tuple containing unique dates and STAC items.

Source code in terrakit/download/data_connectors/sentinel_aws.py
def find_data(
    self,
    data_collection_name: str,
    date_start: str,
    date_end: str,
    area_polygon=None,
    bbox=None,
    bands=[],
    maxcc=100,
    data_connector_spec=None,
) -> Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]:
    """
    Find Sentinel AWS data based on given parameters.

    Args:
        data_collection_name (str): Name of the data collection.
        date_start (str): Start date in 'YYYY-MM-DD' format.
        date_end (str): End date in 'YYYY-MM-DD' format.
        bands (list): List of bands to fetch.
        area_polygon (list, optional): Polygon defining the area of interest.
        bbox (list, optional): Bounding box defining the area of interest.
        maxcc (int, optional): Maximum cloud cover percentage.
        data_connector_spec (dict, optional): Additional data connector specifications.

    Returns:
        tuple: A tuple containing unique dates and STAC items.
    """
    logger.info("Listing Sentinel AWS data")

    check_collection_exists(data_collection_name, self.collections)

    check_start_end_date(date_start=date_start, date_end=date_end)
    check_area_polygon(
        area_polygon=area_polygon, connector_type=self.connector_type
    )
    check_bbox(bbox=bbox, connector_type=self.connector_type)

    collection_detials = self._get_collection_info(data_collection_name)
    fields = self._get_search_fields(collection_detials)

    try:
        unique_dates, stac_items = find_items(
            self.stac_url,
            bbox,
            date_start,
            date_end,
            bands=bands,
            collections=[data_collection_name],
            limit=250,
            maxcc=maxcc,
            data_connector_spec=data_connector_spec,
            fields=fields,
        )

    except ValueError as e:
        error_msg = f"Unable to find data for collection '{data_collection_name}. This could be due to the parameters set:\n\t bbox={bbox}, start_date={date_start}, end_date={date_end}, collection={data_collection_name}, fields={fields}, max_cc={maxcc}."
        logger.exception(error_msg)
        raise TerrakitValueError(error_msg) from e

    stac_items = [
        {"id": item.id, "properties": item.properties} for item in stac_items
    ]

    return unique_dates, stac_items

get_data

Get Sentinel AWS data based on given parameters.

Parameters:

Name Type Description Default
data_collection_name str

Name of the data collection.

required
date_start str

Start date in 'YYYY-MM-DD' format.

required
date_end str

End date in 'YYYY-MM-DD' format.

required
area_polygon list

Polygon defining the area of interest.

None
bbox list

Bounding box defining the area of interest.

None
bands list

List of bands to retrieve.

[]
maxcc int

Maximum cloud cover percentage.

100
data_connector_spec dict

Additional data connector specifications.

None
save_file str

Path to save the data.

None
working_dir str

Working directory for saving files.

'.'

Returns:

Name Type Description
xarray Union[DataArray, None]

An xarray Datasets containing the fetched data with dimensions (time, band, y, x).

Source code in terrakit/download/data_connectors/sentinel_aws.py
def get_data(
    self,
    data_collection_name,
    date_start,
    date_end,
    area_polygon=None,
    bbox=None,
    bands=[],
    maxcc=100,
    data_connector_spec=None,
    save_file=None,
    working_dir=".",
) -> Union[xr.DataArray, None]:
    """
    Get Sentinel AWS data based on given parameters.

    Args:
        data_collection_name (str): Name of the data collection.
        date_start (str): Start date in 'YYYY-MM-DD' format.
        date_end (str): End date in 'YYYY-MM-DD' format.
        area_polygon (list, optional): Polygon defining the area of interest.
        bbox (list, optional): Bounding box defining the area of interest.
        bands (list, optional): List of bands to retrieve.
        maxcc (int, optional): Maximum cloud cover percentage.
        data_connector_spec (dict, optional): Additional data connector specifications.
        save_file (str, optional): Path to save the data.
        working_dir (str, optional): Working directory for saving files.

    Returns:
        xarray: An xarray Datasets containing the fetched data with dimensions (time, band, y, x).
    """
    check_collection_exists(data_collection_name, self.collections)
    # Check that the bands the user has requested exist in the data collection
    check_bands(
        connector_type=self.connector_type,
        collection_name=data_collection_name,
        bands=bands,
    )

    if data_connector_spec is None:
        data_connector_spec_list = [
            X
            for X in self.collections_details
            if X["collection_name"] == data_collection_name
        ]
        if len(data_connector_spec_list) == 0:
            error_msg = (
                f"Unable to find collection details for '{data_collection_name}'"
            )
            logger.error(error_msg)
            raise TerrakitValueError(error_msg)
        data_connector_spec = data_connector_spec_list[0]

    try:
        unique_dates, results = self.find_data(
            data_collection_name=data_collection_name,
            date_start=date_start,
            date_end=date_end,
            bbox=bbox,
            bands=bands,
            maxcc=maxcc,
            data_connector_spec=data_connector_spec,
        )
    except TerrakitValueError as e:
        raise e

    da_list: list[Any] = []
    for date in unique_dates:  # type: ignore[union-attr]
        da: xr.DataArray = get_sh_aws_data(
            self.stac_url,
            bbox,
            date_start,
            date_end,
            bands=bands,
            collections=[data_collection_name],
            limit=250,
            maxcc=maxcc,
            data_connector_spec=data_connector_spec,
        )
        date_time_stamp = datetime.strptime(date, "%Y-%m-%d")
        da = da.assign_coords({"band": bands, "time": date_time_stamp})
        da_list.append(da)

    da = xr.concat(da_list, dim="time")
    save_data_array_to_file(da, save_file)

    return da