Skip to content

Sentinel Hub Data Connector Documentation

Documentation for the terrakit.download.data_connectors.sentinelhub data connector module.

terrakit.download.data_connectors.sentinelhub

SentinelHub

Bases: Connector

A class to interact with Sentinel Hub data services.

Attributes:

Name Type Description
collections list

A list of available collections.

collections_details list

Detailed information about the collections.

sh_config SHConfig

Configuration settings for Sentinel Hub.

Source code in terrakit/download/data_connectors/sentinelhub.py
class SentinelHub(Connector):
    """
    A class to interact with Sentinel Hub data services.

    Attributes:
        collections (list): A list of available collections.
        collections_details (list): Detailed information about the collections.
        sh_config (SHConfig): Configuration settings for Sentinel Hub.
    """

    def __init__(self):
        """
        Initialize SentinelHub with collections and configuration.
        """
        self.connector_type = "sentinelhub"
        self.collections: list[Any] = load_and_list_collections(
            connector_type="sentinelhub"
        )
        self.collections_details = load_and_list_collections(
            as_json=True, connector_type="sentinelhub"
        )
        self.sh_config = SHConfig()

    def list_collections(self) -> list[Any]:
        """
        Lists the available collections.

        Returns:
            list: A list of collection names.
        """
        logger.info("Listing available collections")
        return self.collections

    def find_data(
        self,
        data_collection_name: str,
        date_start: str,
        date_end: str,
        area_polygon=None,
        bbox=None,
        bands=[],
        maxcc=100,
        data_connector_spec=None,
    ) -> Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]:
        """
        This function retrieves unique dates and corresponding data results from a specified Sentinel Hub data collection.

        Parameters:
            data_collection_name (str): The name of the Sentinel Hub data collection to search.
            date_start (str): The start date for the time interval in 'YYYY-MM-DD' format.
            date_end (str): The end date for the time interval in 'YYYY-MM-DD' format.
            area_polygon (Polygon, optional): A polygon defining the area of interest.
            bbox (tuple, optional): A bounding box defining the area of interest in the format (minx, miny, maxx, maxy).
            bands (list, optional): A list of bands to retrieve. Defaults to [].
            maxcc (int, optional): The maximum cloud cover percentage for the data. Default is 100 (no cloud cover filter).
            data_connector_spec (list, optional): A dictionary containing the data connector specification.

        Returns:
            tuple: A tuple containing a sorted list of unique dates and a list of data results.

        Raises:
            TerrakitValidationError: If a validation error occurs.
            TerrakitValueError: If a value error occurs.
        """
        # Check credentials have been set correctly.
        if "SH_CLIENT_ID" not in os.environ and "SH_CLIENT_SECRET" not in os.environ:
            raise TerrakitValidationError(
                message="Error: Missing credentials 'SH_CLIENT_ID' and 'SH_CLIENT_SECRET'. Please update .env with correct credentials."
            )

        # Check data_collection_name exists in self.collections.
        check_collection_exists(data_collection_name, self.collections)

        # Check date_start and date_end are in the correct format.
        check_start_end_date(date_start=date_start, date_end=date_end)
        check_area_polygon(
            area_polygon=area_polygon, connector_type=self.connector_type
        )
        check_bbox(bbox=bbox, connector_type=self.connector_type)

        if data_connector_spec is None:
            data_connector_spec_list = [
                X
                for X in self.collections_details
                if X["collection_name"] == data_collection_name
            ]
            if len(data_connector_spec_list) == 0:
                error_msg = (
                    f"Unable to find collection details for '{data_collection_name}'"
                )
                logger.error(error_msg)
                raise ValueError(error_msg)
            data_connector_spec = data_connector_spec_list[0]

        data_collection = eval(data_connector_spec["data_collection"])

        self.sh_config.sh_base_url = data_collection.service_url
        logger.info(self.sh_config.sh_base_url)
        dataset_catalog = SentinelHubCatalog(config=self.sh_config)

        if "filter" in data_connector_spec["search"]:
            filter_string = data_connector_spec["search"]["filter"]

            for X in data_connector_spec["request_input_data"]:
                if X == "maxcc":
                    filter_string = filter_string.replace(X, str(maxcc))
                else:
                    filter_string = filter_string.replace(
                        X, str(data_connector_spec["request_input_data"][X])
                    )
        else:
            filter_string = ""

        if "fields" in data_connector_spec["search"]:
            fields_dict = eval(data_connector_spec["search"]["fields"])

        else:
            fields_dict = {"include": ["id", "properties.datetime"], "exclude": []}

        time_interval = date_start, date_end

        if bbox is not None:
            aoi_bbox = BBox(bbox=bbox, crs=CRS.WGS84)

            search_iterator = dataset_catalog.search(
                data_collection,
                bbox=aoi_bbox,
                time=time_interval,
                filter=filter_string,
                fields=fields_dict,
            )
        elif area_polygon is not None:
            search_iterator = dataset_catalog.search(
                data_collection,
                intersects=area_polygon,
                time=time_interval,
                filter=filter_string,
                fields=fields_dict,
            )
        else:
            error_msg = f"Error: Issue finding data from {self.connector_type}. Please specify at least one of 'bbox' and 'area_polygon'"
            logger.error(error_msg)
            raise TerrakitValueError(error_msg)

        try:
            results = list(search_iterator)
        except InvalidClientError as e:
            error_msg = (
                f"Error: Issue authenticating. Check credentials are up to date.{e}"
            )
            logger.error(error_msg)
            return None, None

        unique_dates = sorted(set([X["properties"]["datetime"][0:10] for X in results]))
        return unique_dates, results

    def get_data(
        self,
        data_collection_name,
        date_start,
        date_end,
        area_polygon=None,
        bbox=None,
        bands=[],
        maxcc=100,
        data_connector_spec=None,
        save_file=None,
        working_dir=".",
    ) -> Union[xr.DataArray, None]:
        """
        Fetches data from SentinelHub for the specified collection, date range, area, and bands.

        Parameters:
            data_collection_name (str): Name of the data collection to fetch data from.
            date_start (str): Start date for the data retrieval (inclusive), in 'YYYY-MM-DD' format.
            date_end (str): End date for the data retrieval (inclusive), in 'YYYY-MM-DD' format.
            area_polygon (list, optional): Polygon defining the area of interest. Defaults to None.
            bbox (list, optional): Bounding box defining the area of interest. Defaults to None.
            bands (list, optional): List of bands to retrieve. Defaults to all bands.
            maxcc (int, optional): Maximum cloud cover threshold (0-100). Defaults to 100.
            data_connector_spec (dict, optional): Data connector specification. Defaults to None.
            save_file (str, optional): Path to save the output file. Defaults to None.
            working_dir (str, optional): Working directory for temporary files. Defaults to '.'.

        Returns:
            xarray: An xarray Datasets containing the fetched data with dimensions (time, band, y, x).

        Raises:
            TerrakitValidationError: If a validation error occurs.
            TerrakitValueError: If a value error occurs.
        """
        # Check credentials have been set correctly.
        if "SH_CLIENT_ID" not in os.environ and "SH_CLIENT_SECRET" not in os.environ:
            error_msg = "Error: Missing credentials 'SH_CLIENT_ID' and 'SH_CLIENT_SECRET'. Please update .env with correct credentials."
            logger.warning(error_msg)
            raise TerrakitValidationError(error_msg)

        # Check data_collection_name exists in self.collections.
        if data_collection_name not in self.collections:
            error_msg = f"Invalid collection '{data_collection_name}'. Please choose from one of the following collection {self.collections}"
            logger.warning(error_msg)
            raise TerrakitValueError(error_msg)

        logger.info(bands)
        collection_details = [
            X
            for X in self.collections_details
            if X["collection_name"] == data_collection_name
        ][0]

        unique_dates, res = self.find_data(
            data_collection_name, date_start, date_end, bbox=bbox, maxcc=maxcc
        )

        # Check that unique dates and find_data results are not None.
        if unique_dates is None and res is None:
            logger.warning("Warning: Unique dates and find_data results are None")
            return None

        if unique_dates == []:
            logger.warning(
                f"No data found for the specified date range {date_start}:{date_end}. Unique dates: {unique_dates}"
            )
            return None
        da_list = []
        logger.info(f"The following unique dates were found: {unique_dates}")
        for udate in unique_dates:  # type: ignore[union-attr]
            usave_file = (
                save_file.replace(".tif", f"_{udate}.tif")
                if save_file is not None
                else None
            )

            da: xr.DataArray = sh_get_data(
                self.sh_config,
                collection_details,
                bbox,
                udate,
                bands,
                usave_file,
                sh_data_dir=f"{working_dir}/sh_data",
            )

            da_list.append(da)

        logger.info("Concatenating data...")
        da = xr.concat(da_list, dim="time")

        # Save to file
        save_data_array_to_file(da, save_file)

        sh_data_dir = f"{working_dir}/sh_data"
        logging.info(f"Removing dir {sh_data_dir}")
        shutil.rmtree(sh_data_dir, ignore_errors=True)

        return da

list_collections

Lists the available collections.

Returns:

Name Type Description
list list[Any]

A list of collection names.

Source code in terrakit/download/data_connectors/sentinelhub.py
def list_collections(self) -> list[Any]:
    """
    Lists the available collections.

    Returns:
        list: A list of collection names.
    """
    logger.info("Listing available collections")
    return self.collections

find_data

This function retrieves unique dates and corresponding data results from a specified Sentinel Hub data collection.

Parameters:

Name Type Description Default
data_collection_name str

The name of the Sentinel Hub data collection to search.

required
date_start str

The start date for the time interval in 'YYYY-MM-DD' format.

required
date_end str

The end date for the time interval in 'YYYY-MM-DD' format.

required
area_polygon Polygon

A polygon defining the area of interest.

None
bbox tuple

A bounding box defining the area of interest in the format (minx, miny, maxx, maxy).

None
bands list

A list of bands to retrieve. Defaults to [].

[]
maxcc int

The maximum cloud cover percentage for the data. Default is 100 (no cloud cover filter).

100
data_connector_spec list

A dictionary containing the data connector specification.

None

Returns:

Name Type Description
tuple Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]

A tuple containing a sorted list of unique dates and a list of data results.

Raises:

Type Description
TerrakitValidationError

If a validation error occurs.

TerrakitValueError

If a value error occurs.

Source code in terrakit/download/data_connectors/sentinelhub.py
def find_data(
    self,
    data_collection_name: str,
    date_start: str,
    date_end: str,
    area_polygon=None,
    bbox=None,
    bands=[],
    maxcc=100,
    data_connector_spec=None,
) -> Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]:
    """
    This function retrieves unique dates and corresponding data results from a specified Sentinel Hub data collection.

    Parameters:
        data_collection_name (str): The name of the Sentinel Hub data collection to search.
        date_start (str): The start date for the time interval in 'YYYY-MM-DD' format.
        date_end (str): The end date for the time interval in 'YYYY-MM-DD' format.
        area_polygon (Polygon, optional): A polygon defining the area of interest.
        bbox (tuple, optional): A bounding box defining the area of interest in the format (minx, miny, maxx, maxy).
        bands (list, optional): A list of bands to retrieve. Defaults to [].
        maxcc (int, optional): The maximum cloud cover percentage for the data. Default is 100 (no cloud cover filter).
        data_connector_spec (list, optional): A dictionary containing the data connector specification.

    Returns:
        tuple: A tuple containing a sorted list of unique dates and a list of data results.

    Raises:
        TerrakitValidationError: If a validation error occurs.
        TerrakitValueError: If a value error occurs.
    """
    # Check credentials have been set correctly.
    if "SH_CLIENT_ID" not in os.environ and "SH_CLIENT_SECRET" not in os.environ:
        raise TerrakitValidationError(
            message="Error: Missing credentials 'SH_CLIENT_ID' and 'SH_CLIENT_SECRET'. Please update .env with correct credentials."
        )

    # Check data_collection_name exists in self.collections.
    check_collection_exists(data_collection_name, self.collections)

    # Check date_start and date_end are in the correct format.
    check_start_end_date(date_start=date_start, date_end=date_end)
    check_area_polygon(
        area_polygon=area_polygon, connector_type=self.connector_type
    )
    check_bbox(bbox=bbox, connector_type=self.connector_type)

    if data_connector_spec is None:
        data_connector_spec_list = [
            X
            for X in self.collections_details
            if X["collection_name"] == data_collection_name
        ]
        if len(data_connector_spec_list) == 0:
            error_msg = (
                f"Unable to find collection details for '{data_collection_name}'"
            )
            logger.error(error_msg)
            raise ValueError(error_msg)
        data_connector_spec = data_connector_spec_list[0]

    data_collection = eval(data_connector_spec["data_collection"])

    self.sh_config.sh_base_url = data_collection.service_url
    logger.info(self.sh_config.sh_base_url)
    dataset_catalog = SentinelHubCatalog(config=self.sh_config)

    if "filter" in data_connector_spec["search"]:
        filter_string = data_connector_spec["search"]["filter"]

        for X in data_connector_spec["request_input_data"]:
            if X == "maxcc":
                filter_string = filter_string.replace(X, str(maxcc))
            else:
                filter_string = filter_string.replace(
                    X, str(data_connector_spec["request_input_data"][X])
                )
    else:
        filter_string = ""

    if "fields" in data_connector_spec["search"]:
        fields_dict = eval(data_connector_spec["search"]["fields"])

    else:
        fields_dict = {"include": ["id", "properties.datetime"], "exclude": []}

    time_interval = date_start, date_end

    if bbox is not None:
        aoi_bbox = BBox(bbox=bbox, crs=CRS.WGS84)

        search_iterator = dataset_catalog.search(
            data_collection,
            bbox=aoi_bbox,
            time=time_interval,
            filter=filter_string,
            fields=fields_dict,
        )
    elif area_polygon is not None:
        search_iterator = dataset_catalog.search(
            data_collection,
            intersects=area_polygon,
            time=time_interval,
            filter=filter_string,
            fields=fields_dict,
        )
    else:
        error_msg = f"Error: Issue finding data from {self.connector_type}. Please specify at least one of 'bbox' and 'area_polygon'"
        logger.error(error_msg)
        raise TerrakitValueError(error_msg)

    try:
        results = list(search_iterator)
    except InvalidClientError as e:
        error_msg = (
            f"Error: Issue authenticating. Check credentials are up to date.{e}"
        )
        logger.error(error_msg)
        return None, None

    unique_dates = sorted(set([X["properties"]["datetime"][0:10] for X in results]))
    return unique_dates, results

get_data

Fetches data from SentinelHub for the specified collection, date range, area, and bands.

Parameters:

Name Type Description Default
data_collection_name str

Name of the data collection to fetch data from.

required
date_start str

Start date for the data retrieval (inclusive), in 'YYYY-MM-DD' format.

required
date_end str

End date for the data retrieval (inclusive), in 'YYYY-MM-DD' format.

required
area_polygon list

Polygon defining the area of interest. Defaults to None.

None
bbox list

Bounding box defining the area of interest. Defaults to None.

None
bands list

List of bands to retrieve. Defaults to all bands.

[]
maxcc int

Maximum cloud cover threshold (0-100). Defaults to 100.

100
data_connector_spec dict

Data connector specification. Defaults to None.

None
save_file str

Path to save the output file. Defaults to None.

None
working_dir str

Working directory for temporary files. Defaults to '.'.

'.'

Returns:

Name Type Description
xarray Union[DataArray, None]

An xarray Datasets containing the fetched data with dimensions (time, band, y, x).

Raises:

Type Description
TerrakitValidationError

If a validation error occurs.

TerrakitValueError

If a value error occurs.

Source code in terrakit/download/data_connectors/sentinelhub.py
def get_data(
    self,
    data_collection_name,
    date_start,
    date_end,
    area_polygon=None,
    bbox=None,
    bands=[],
    maxcc=100,
    data_connector_spec=None,
    save_file=None,
    working_dir=".",
) -> Union[xr.DataArray, None]:
    """
    Fetches data from SentinelHub for the specified collection, date range, area, and bands.

    Parameters:
        data_collection_name (str): Name of the data collection to fetch data from.
        date_start (str): Start date for the data retrieval (inclusive), in 'YYYY-MM-DD' format.
        date_end (str): End date for the data retrieval (inclusive), in 'YYYY-MM-DD' format.
        area_polygon (list, optional): Polygon defining the area of interest. Defaults to None.
        bbox (list, optional): Bounding box defining the area of interest. Defaults to None.
        bands (list, optional): List of bands to retrieve. Defaults to all bands.
        maxcc (int, optional): Maximum cloud cover threshold (0-100). Defaults to 100.
        data_connector_spec (dict, optional): Data connector specification. Defaults to None.
        save_file (str, optional): Path to save the output file. Defaults to None.
        working_dir (str, optional): Working directory for temporary files. Defaults to '.'.

    Returns:
        xarray: An xarray Datasets containing the fetched data with dimensions (time, band, y, x).

    Raises:
        TerrakitValidationError: If a validation error occurs.
        TerrakitValueError: If a value error occurs.
    """
    # Check credentials have been set correctly.
    if "SH_CLIENT_ID" not in os.environ and "SH_CLIENT_SECRET" not in os.environ:
        error_msg = "Error: Missing credentials 'SH_CLIENT_ID' and 'SH_CLIENT_SECRET'. Please update .env with correct credentials."
        logger.warning(error_msg)
        raise TerrakitValidationError(error_msg)

    # Check data_collection_name exists in self.collections.
    if data_collection_name not in self.collections:
        error_msg = f"Invalid collection '{data_collection_name}'. Please choose from one of the following collection {self.collections}"
        logger.warning(error_msg)
        raise TerrakitValueError(error_msg)

    logger.info(bands)
    collection_details = [
        X
        for X in self.collections_details
        if X["collection_name"] == data_collection_name
    ][0]

    unique_dates, res = self.find_data(
        data_collection_name, date_start, date_end, bbox=bbox, maxcc=maxcc
    )

    # Check that unique dates and find_data results are not None.
    if unique_dates is None and res is None:
        logger.warning("Warning: Unique dates and find_data results are None")
        return None

    if unique_dates == []:
        logger.warning(
            f"No data found for the specified date range {date_start}:{date_end}. Unique dates: {unique_dates}"
        )
        return None
    da_list = []
    logger.info(f"The following unique dates were found: {unique_dates}")
    for udate in unique_dates:  # type: ignore[union-attr]
        usave_file = (
            save_file.replace(".tif", f"_{udate}.tif")
            if save_file is not None
            else None
        )

        da: xr.DataArray = sh_get_data(
            self.sh_config,
            collection_details,
            bbox,
            udate,
            bands,
            usave_file,
            sh_data_dir=f"{working_dir}/sh_data",
        )

        da_list.append(da)

    logger.info("Concatenating data...")
    da = xr.concat(da_list, dim="time")

    # Save to file
    save_data_array_to_file(da, save_file)

    sh_data_dir = f"{working_dir}/sh_data"
    logging.info(f"Removing dir {sh_data_dir}")
    shutil.rmtree(sh_data_dir, ignore_errors=True)

    return da