Skip to content

Add a New Data Connector

To add a new data connector, use the connector_template.py as a starting point. The new connector should implement the list_collection, find_data and get_data functions and extend the Connector class from the terrakit.download.connector module. Finally update terrakit.py to enable the new connector to be selected.

To also include new tests for the new connector, please make use of test_connector_template.py.

Make sure to also update the documentation. Each data connector has a separate markdown file making it easy to add new docs.

Data Connector Template class Documentation

ConnectorTemplate

Bases: Connector

Attributes:

Name Type Description
connector_type str

Name of connector

collections list

A list of available collections.

collections_details list

Detailed information about the collections.

Source code in terrakit/download/data_connectors/connector_template.py
class ConnectorTemplate(Connector):
    """
    Attributes:
        connector_type (str): Name of connector
        collections (list): A list of available collections.
        collections_details (list): Detailed information about the collections.
    """

    def __init__(self):
        """
        Initialize SentinelHub with collections and configuration.
        """
        self.connector_type: str = "<new_connector>"
        self.collections: list[Any] = load_and_list_collections(
            connector_type=self.connector_type
        )
        self.collections_details: list[Any] = load_and_list_collections(
            as_json=True, connector_type=self.connector_type
        )

    def list_collections(self) -> list[Any]:
        """
        Lists the available collections.

        Returns:
            list: A list of collection names.
        """
        logger.info("Listing available collections")
        return self.collections

    def find_data(
        self,
        data_collection_name: str,
        date_start: str,
        date_end: str,
        area_polygon=None,
        bbox=None,
        bands=[],
        maxcc=100,
        data_connector_spec=None,
    ) -> Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]:
        """
        This function retrieves unique dates and corresponding data results from a specified Sentinel Hub data collection.

        Args:
            data_collection_name (str): The name of the Sentinel Hub data collection to search.
            date_start (str): The start date for the time interval in 'YYYY-MM-DD' format.
            date_end (str): The end date for the time interval in 'YYYY-MM-DD' format.
            area_polygon (Polygon, optional): A polygon defining the area of interest.
            bbox (tuple, optional): A bounding box defining the area of interest in the format (minx, miny, maxx, maxy).
            bands (list, optional): A list of bands to retrieve. Defaults to [].
            maxcc (int, optional): The maximum cloud cover percentage for the data. Default is 100 (no cloud cover filter).
            data_connector_spec (list, optional): A dictionary containing the data connector specification.

        Returns:
            tuple: A tuple containing a sorted list of unique dates and a list of data results.
        """
        unique_dates: list[str] = []
        results: list[dict[str, Any]] = [{}]
        return unique_dates, results

    def get_data(
        self,
        data_collection_name,
        date_start,
        date_end,
        area_polygon=None,
        bbox=None,
        bands=[],
        maxcc=100,
        data_connector_spec=None,
        save_file=None,
        working_dir=".",
    ):
        """
        Fetches data from SentinelHub for the specified collection, date range, area, and bands.

        Args:
            data_collection_name (str): Name of the data collection to fetch data from.
            date_start (str): Start date for the data retrieval (inclusive), in 'YYYY-MM-DD' format.
            date_end (str): End date for the data retrieval (inclusive), in 'YYYY-MM-DD' format.
            area_polygon (list, optional): Polygon defining the area of interest. Defaults to None.
            bbox (list, optional): Bounding box defining the area of interest. Defaults to None.
            bands (list, optional): List of bands to retrieve. Defaults to all bands.
            maxcc (int, optional): Maximum cloud cover threshold (0-100). Defaults to 100.
            data_connector_spec (dict, optional): Data connector specification. Defaults to None.
            save_file (str, optional): Path to save the output file. Defaults to None.
            working_dir (str, optional): Working directory for temporary files. Defaults to '.'.

        Returns:
            xarray: An xarray Datasets containing the fetched data with dimensions (time, band, y, x).
        """
        da = xr.DataArray()
        return da

list_collections

Lists the available collections.

Returns:

Name Type Description
list list[Any]

A list of collection names.

Source code in terrakit/download/data_connectors/connector_template.py
def list_collections(self) -> list[Any]:
    """
    Lists the available collections.

    Returns:
        list: A list of collection names.
    """
    logger.info("Listing available collections")
    return self.collections

find_data

This function retrieves unique dates and corresponding data results from a specified Sentinel Hub data collection.

Parameters:

Name Type Description Default
data_collection_name str

The name of the Sentinel Hub data collection to search.

required
date_start str

The start date for the time interval in 'YYYY-MM-DD' format.

required
date_end str

The end date for the time interval in 'YYYY-MM-DD' format.

required
area_polygon Polygon

A polygon defining the area of interest.

None
bbox tuple

A bounding box defining the area of interest in the format (minx, miny, maxx, maxy).

None
bands list

A list of bands to retrieve. Defaults to [].

[]
maxcc int

The maximum cloud cover percentage for the data. Default is 100 (no cloud cover filter).

100
data_connector_spec list

A dictionary containing the data connector specification.

None

Returns:

Name Type Description
tuple Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]

A tuple containing a sorted list of unique dates and a list of data results.

Source code in terrakit/download/data_connectors/connector_template.py
def find_data(
    self,
    data_collection_name: str,
    date_start: str,
    date_end: str,
    area_polygon=None,
    bbox=None,
    bands=[],
    maxcc=100,
    data_connector_spec=None,
) -> Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]:
    """
    This function retrieves unique dates and corresponding data results from a specified Sentinel Hub data collection.

    Args:
        data_collection_name (str): The name of the Sentinel Hub data collection to search.
        date_start (str): The start date for the time interval in 'YYYY-MM-DD' format.
        date_end (str): The end date for the time interval in 'YYYY-MM-DD' format.
        area_polygon (Polygon, optional): A polygon defining the area of interest.
        bbox (tuple, optional): A bounding box defining the area of interest in the format (minx, miny, maxx, maxy).
        bands (list, optional): A list of bands to retrieve. Defaults to [].
        maxcc (int, optional): The maximum cloud cover percentage for the data. Default is 100 (no cloud cover filter).
        data_connector_spec (list, optional): A dictionary containing the data connector specification.

    Returns:
        tuple: A tuple containing a sorted list of unique dates and a list of data results.
    """
    unique_dates: list[str] = []
    results: list[dict[str, Any]] = [{}]
    return unique_dates, results

get_data

Fetches data from SentinelHub for the specified collection, date range, area, and bands.

Parameters:

Name Type Description Default
data_collection_name str

Name of the data collection to fetch data from.

required
date_start str

Start date for the data retrieval (inclusive), in 'YYYY-MM-DD' format.

required
date_end str

End date for the data retrieval (inclusive), in 'YYYY-MM-DD' format.

required
area_polygon list

Polygon defining the area of interest. Defaults to None.

None
bbox list

Bounding box defining the area of interest. Defaults to None.

None
bands list

List of bands to retrieve. Defaults to all bands.

[]
maxcc int

Maximum cloud cover threshold (0-100). Defaults to 100.

100
data_connector_spec dict

Data connector specification. Defaults to None.

None
save_file str

Path to save the output file. Defaults to None.

None
working_dir str

Working directory for temporary files. Defaults to '.'.

'.'

Returns:

Name Type Description
xarray

An xarray Datasets containing the fetched data with dimensions (time, band, y, x).

Source code in terrakit/download/data_connectors/connector_template.py
def get_data(
    self,
    data_collection_name,
    date_start,
    date_end,
    area_polygon=None,
    bbox=None,
    bands=[],
    maxcc=100,
    data_connector_spec=None,
    save_file=None,
    working_dir=".",
):
    """
    Fetches data from SentinelHub for the specified collection, date range, area, and bands.

    Args:
        data_collection_name (str): Name of the data collection to fetch data from.
        date_start (str): Start date for the data retrieval (inclusive), in 'YYYY-MM-DD' format.
        date_end (str): End date for the data retrieval (inclusive), in 'YYYY-MM-DD' format.
        area_polygon (list, optional): Polygon defining the area of interest. Defaults to None.
        bbox (list, optional): Bounding box defining the area of interest. Defaults to None.
        bands (list, optional): List of bands to retrieve. Defaults to all bands.
        maxcc (int, optional): Maximum cloud cover threshold (0-100). Defaults to 100.
        data_connector_spec (dict, optional): Data connector specification. Defaults to None.
        save_file (str, optional): Path to save the output file. Defaults to None.
        working_dir (str, optional): Working directory for temporary files. Defaults to '.'.

    Returns:
        xarray: An xarray Datasets containing the fetched data with dimensions (time, band, y, x).
    """
    da = xr.DataArray()
    return da

Data Connector Abstract class Documentation

terrakit.download.connector

Connector

Bases: ABC

An abstract base class for all connectors. This class insists that any subclass must have a list_collections(), find_data() and get_data() method.

Methods:

Name Description
list_collections

Returns a list of available data collections.

find_data

Finds data within specified parameters and returns a list of unique dates and relevant metadata.

get_data

Retrieves data based on given parameters and saves to file.

Source code in terrakit/download/connector.py
class Connector(ABC):
    """
    An abstract base class for all connectors.
    This class insists that any subclass must have a list_collections(), find_data()
    and get_data() method.

    Attributes:
        None

    Methods:
        list_collections: Returns a list of available data collections.
        find_data: Finds data within specified parameters and returns a list of unique dates and relevant metadata.
        get_data: Retrieves data based on given parameters and saves to file.
    """

    @abstractmethod
    def list_collections(self) -> list[Any]:
        """
        Returns a list of available data collections.

        Returns:
            list[Any]: List of available data collection names.
        """
        pass

    @abstractmethod
    def find_data(
        self,
        data_collection_name: str,
        date_start: str,
        date_end: str,
        area_polygon=None,
        bbox=None,
        bands=[],
        maxcc=100,
        data_connector_spec=None,
    ) -> Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]:
        """
        Finds data within specified parameters and returns relevant metadata.

        Args:
            data_collection_name (str): The name of the data collection to search.
            date_start (str): The start date for the data retrieval.
            date_end (str): The end date for the data retrieval.
            area_polygon (Optional[Any]): Polygon defining the area of interest. Either specify area_polygon or bbox.
            bbox (Optional[Any]): Bounding box defining the area of interest. Either specify area_polygon or bbox.
            bands (list[str]): List of bands to retrieve.
            maxcc (int): Maximum cloud cover percentage.
            data_connector_spec (Optional[Any]): Additional specifications for the data connector.

        Returns:
            Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]: A tuple containing a list of data identifiers and a list of metadata dictionaries, or (None, None) if no data is found.
        """
        pass

    @abstractmethod
    def get_data(
        self,
        data_collection_name,
        date_start,
        date_end,
        area_polygon=None,
        bbox=None,
        bands=[],
        maxcc=100,
        data_connector_spec=None,
        save_file=None,
        working_dir=".",
    ) -> Union[xr.DataArray, None]:
        """
        Retrieves data based on given parameters and optional saving to file.

        Args:
            data_collection_name (str): The name of the data collection to retrieve.
            date_start (str): The start date for data retrieval.
            date_end (str): The end date for data retrieval.
            area_polygon (Optional[Any]): Polygon defining the area of interest.
            bbox (Optional[Any]): Bounding box defining the area of interest.
            bands (list[str]): List of bands to retrieve.
            maxcc (int): Maximum cloud cover percentage.
            data_connector_spec (Optional[Any]): Additional specifications for the data connector.
            save_file (Optional[str]): Path to save the retrieved data file.
            working_dir (str): Working directory for saving the file.

        Returns:
            Union[xr.DataArray, None]: The retrieved xarray DataArray or None if no data is found.
        """
        pass

list_collections abstractmethod

Returns a list of available data collections.

Returns:

Type Description
list[Any]

list[Any]: List of available data collection names.

Source code in terrakit/download/connector.py
@abstractmethod
def list_collections(self) -> list[Any]:
    """
    Returns a list of available data collections.

    Returns:
        list[Any]: List of available data collection names.
    """
    pass

find_data abstractmethod

Finds data within specified parameters and returns relevant metadata.

Parameters:

Name Type Description Default
data_collection_name str

The name of the data collection to search.

required
date_start str

The start date for the data retrieval.

required
date_end str

The end date for the data retrieval.

required
area_polygon Optional[Any]

Polygon defining the area of interest. Either specify area_polygon or bbox.

None
bbox Optional[Any]

Bounding box defining the area of interest. Either specify area_polygon or bbox.

None
bands list[str]

List of bands to retrieve.

[]
maxcc int

Maximum cloud cover percentage.

100
data_connector_spec Optional[Any]

Additional specifications for the data connector.

None

Returns:

Type Description
Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]

Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]: A tuple containing a list of data identifiers and a list of metadata dictionaries, or (None, None) if no data is found.

Source code in terrakit/download/connector.py
@abstractmethod
def find_data(
    self,
    data_collection_name: str,
    date_start: str,
    date_end: str,
    area_polygon=None,
    bbox=None,
    bands=[],
    maxcc=100,
    data_connector_spec=None,
) -> Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]:
    """
    Finds data within specified parameters and returns relevant metadata.

    Args:
        data_collection_name (str): The name of the data collection to search.
        date_start (str): The start date for the data retrieval.
        date_end (str): The end date for the data retrieval.
        area_polygon (Optional[Any]): Polygon defining the area of interest. Either specify area_polygon or bbox.
        bbox (Optional[Any]): Bounding box defining the area of interest. Either specify area_polygon or bbox.
        bands (list[str]): List of bands to retrieve.
        maxcc (int): Maximum cloud cover percentage.
        data_connector_spec (Optional[Any]): Additional specifications for the data connector.

    Returns:
        Union[tuple[list[Any], list[dict[str, Any]]], tuple[None, None]]: A tuple containing a list of data identifiers and a list of metadata dictionaries, or (None, None) if no data is found.
    """
    pass

get_data abstractmethod

Retrieves data based on given parameters and optional saving to file.

Parameters:

Name Type Description Default
data_collection_name str

The name of the data collection to retrieve.

required
date_start str

The start date for data retrieval.

required
date_end str

The end date for data retrieval.

required
area_polygon Optional[Any]

Polygon defining the area of interest.

None
bbox Optional[Any]

Bounding box defining the area of interest.

None
bands list[str]

List of bands to retrieve.

[]
maxcc int

Maximum cloud cover percentage.

100
data_connector_spec Optional[Any]

Additional specifications for the data connector.

None
save_file Optional[str]

Path to save the retrieved data file.

None
working_dir str

Working directory for saving the file.

'.'

Returns:

Type Description
Union[DataArray, None]

Union[xr.DataArray, None]: The retrieved xarray DataArray or None if no data is found.

Source code in terrakit/download/connector.py
@abstractmethod
def get_data(
    self,
    data_collection_name,
    date_start,
    date_end,
    area_polygon=None,
    bbox=None,
    bands=[],
    maxcc=100,
    data_connector_spec=None,
    save_file=None,
    working_dir=".",
) -> Union[xr.DataArray, None]:
    """
    Retrieves data based on given parameters and optional saving to file.

    Args:
        data_collection_name (str): The name of the data collection to retrieve.
        date_start (str): The start date for data retrieval.
        date_end (str): The end date for data retrieval.
        area_polygon (Optional[Any]): Polygon defining the area of interest.
        bbox (Optional[Any]): Bounding box defining the area of interest.
        bands (list[str]): List of bands to retrieve.
        maxcc (int): Maximum cloud cover percentage.
        data_connector_spec (Optional[Any]): Additional specifications for the data connector.
        save_file (Optional[str]): Path to save the retrieved data file.
        working_dir (str): Working directory for saving the file.

    Returns:
        Union[xr.DataArray, None]: The retrieved xarray DataArray or None if no data is found.
    """
    pass