Skip to content

Documentation for Helper Modules


Documentation for the terrakit.general_utils helper modules.

terrakit.general_utils.labels_downloader

rapid_mapping_event_lookup(event_id) -> dict

Event look up for a given event from Copernicus Rapid Mapping Service.

Parameters:

Name Type Description Default
event_id str

event id is a three digit code unique to each event. Provide either as "EMSR000" or "000".

required

Returns:

Type Description
dict

dict[str: any]: json response containing full details available for a given event.

Source code in terrakit/general_utils/labels_downloader.py
def rapid_mapping_event_lookup(event_id) -> dict:
    """
    Event look up for a given event from Copernicus Rapid Mapping Service.

    Parameters:
        event_id (str): event id is a three digit code unique to each event. Provide either as "EMSR000" or "000".

    Returns:
        dict[str: any]: json response containing full details available for a given event.
    """
    event_id = event_id.upper().strip("EMSR")
    url = f"{COPERNICUS_URL}/dashboard-api/public-activations/?code=EMSR{event_id}"
    resp = get(url)
    resp.raise_for_status()
    resp_json: dict = resp.json()
    return resp_json

rapid_mapping_acquisition_time_lookup(event_id, monitoring_number) -> str

Look up acquisition time for a given event ID from Copernicus Rapid Mapping Service.

Parameters:

Name Type Description Default
event_id str

event id is a three digit code unique to each event. Provide either as "EMSR000" or "000".

required
monitoring_number str

monitoring number given by a two digit number. Provide either as "MONIT00" or "monit00" or "00".

required

Returns:

Name Type Description
str str

acquisition time with format '%Y-%m-%dT%H:%M:%S'

Source code in terrakit/general_utils/labels_downloader.py
def rapid_mapping_acquisition_time_lookup(event_id, monitoring_number) -> str:
    """
    Look up acquisition time for a given event ID from Copernicus Rapid Mapping Service.

    Parameters:
        event_id (str): event id is a three digit code unique to each event. Provide either as "EMSR000" or "000".
        monitoring_number (str): monitoring number given by a two digit number. Provide either as "MONIT00" or "monit00" or "00".

    Returns:
        str: acquisition time with format '%Y-%m-%dT%H:%M:%S'
    """
    event_id = event_id.upper().strip("EMSR")
    monitoring_number = monitoring_number.upper().strip("MONIT")
    resp_json = rapid_mapping_event_lookup(event_id)
    products = resp_json["results"][0]["aois"][0]["products"]
    for product in products:
        if int(monitoring_number) == product["monitoringNumber"]:
            acquisitionTime: str = product["images"][0]["acquisitionTime"]
    return acquisitionTime

rapid_mapping_event_date_time_lookup(event_id) -> str

Look up event date and time for a given event ID from Copernicus Rapid Mapping Service.

Parameters:

Name Type Description Default
event_id str

event id is a three digit code unique to each event. Provide either as "EMSR000" or "000".

required

Returns:

Name Type Description
str str

event time with format '%Y-%m-%dT%H:%M:%S'

Source code in terrakit/general_utils/labels_downloader.py
def rapid_mapping_event_date_time_lookup(event_id) -> str:
    """
    Look up event date and time for a given event ID from Copernicus Rapid Mapping Service.

    Parameters:
        event_id (str): event id is a three digit code unique to each event. Provide either as "EMSR000" or "000".

    Returns:
        str: event time with format '%Y-%m-%dT%H:%M:%S'
    """
    event_id = event_id.upper().strip("EMSR")
    url = f"{COPERNICUS_URL}/dashboard-api/public-activations/?code=EMSR{event_id}"
    resp = get(url)
    resp.raise_for_status()
    event_time: str = resp.json()["results"][0]["eventTime"]
    return event_time

rapid_mapping_geojson_downloader(event_id, aoi, monitoring_number, version, dest) -> str

Download GeoJSON labels from Copernicus Rapid Mapping Service.

Parameters:

Name Type Description Default
event_id str

event id is a three digit code unique to each event. Provide either as "EMSR000" or "emsr000" or "000".

required
aoi str

The area of interest is a two digit code for the aoi of the given event. Provide either as "AOI00" or "aoi00" or "00".

required
monitoring_number str

The monitoring number for the event. Provide either as "MONIT00" or "monit00" or "00".

required
version str

The event version number. Provide either as "V1" or "v1" or "1".

required
dest str

The destination directory to save the downloaded GeoJSON files.

required

Returns:

Name Type Description
str str

downloaded GeoJSON path name

Example
rapid_mapping_geojson_downloader(
    event_id="748",
    aoi="01",
    monitoring_number="05",
    version="v1",
    dest=LABELS_FOLDER,
)
Source code in terrakit/general_utils/labels_downloader.py
def rapid_mapping_geojson_downloader(
    event_id, aoi, monitoring_number, version, dest
) -> str:
    """
    Download GeoJSON labels from Copernicus Rapid Mapping Service.

    Parameters:
        event_id (str): event id is a three digit code unique to each event. Provide either as "EMSR000" or "emsr000" or "000".
        aoi (str): The area of interest is a two digit code for the aoi of the given event. Provide either as "AOI00" or "aoi00" or "00".
        monitoring_number (str): The monitoring number for the event. Provide either as "MONIT00" or "monit00" or "00".
        version (str): The event version number. Provide either as "V1" or "v1" or "1".
        dest (str): The destination directory to save the downloaded GeoJSON files.

    Returns:
        str: downloaded GeoJSON path name

    Example:
        ```python
        rapid_mapping_geojson_downloader(
            event_id="748",
            aoi="01",
            monitoring_number="05",
            version="v1",
            dest=LABELS_FOLDER,
        )
        ```
    """
    dest = Path(dest)
    dest = Path.absolute(dest)
    event_id = event_id.upper().strip("EMSR")
    aoi = aoi.upper().strip("AOI")
    monitoring_number = monitoring_number.upper().strip("MONIT")
    version = version.upper().strip("V")
    zip_id = f"EMSR{event_id}_AOI{aoi}_DEL_MONIT{monitoring_number}_v{version}.zip"
    zip_file = f"{dest}/{zip_id}"
    geojson_file = f"EMSR{event_id}_AOI{aoi}_DEL_MONIT{monitoring_number}_observedEventA_v{version}.json"

    # Check if labels already exist
    acquisition_time = rapid_mapping_acquisition_time_lookup(
        event_id, monitoring_number
    )
    acquisition_date = acquisition_time.split("T")[0]

    # update label geojson to include date
    geojson_file_with_date = geojson_file.replace(".json", f"_{acquisition_date}.json")
    if Path(f"{dest}/{geojson_file_with_date}").is_file():
        print(
            f".\n..\n...\n>>> Skipping download.\n\t>>> File already exists: {dest}/{geojson_file_with_date} already exists."
        )
    else:
        print(
            f".\n..\n...\n>>> Downloading labels from Copernicus Emergency Management Service for: \n\t>>> EMSR{event_id} <<<\n\t>>> AOI{aoi} <<<\n\t>>> MONIT{monitoring_number} <<<\n\t>>> observedEventA <<<\n\t>>> v{version} <<<"
        )
        # Create directory to download results to
        try:
            dest = Path(dest)
            dest.mkdir(parents=True, exist_ok=True)
        except Exception as e:
            raise TerrakitBaseException(
                f"An issue occurred created {dest}. Please check this is a valid dir: {e}"
            )

        # Download zip
        url = f"{COPERNICUS_URL}/EMSR{event_id}/AOI{aoi}/DEL_MONIT{monitoring_number}/{zip_id}"
        print(f".\n..\n...\n>>> Requesting event data from:\n\t>>> {url} ... <<<")
        resp = get(url)
        resp.raise_for_status()

        # Extract zip
        try:
            with open(zip_file, "wb") as f:
                f.write(resp.content)
        except Exception as e:
            raise TerrakitBaseException(
                f"An issue occurred while writting the contents to {zip_file}: {e}"
            )
        print(f".\n..\n...\n>>> Extracting event geojson to:\n\t>>> {dest} ... <<<")
        try:
            with ZipFile(zip_file, "r") as z_file:
                if geojson_file in z_file.namelist():
                    z_file.extract(geojson_file, dest)
                    os.remove(zip_file)
                else:
                    print(
                        f"{geojson_file} not found in zip. Zip contents includes: {z_file.filelist}"
                    )
        except Exception as e:
            raise TerrakitBaseException(
                f"An issue occurred while extracting {geojson_file} from {zip_file}: {e}"
            )

        # update label geojson to include date
        geojson_file_with_date = geojson_file.replace(
            ".json", f"_{acquisition_date}.json"
        )
        try:
            os.rename(f"{dest}/{geojson_file}", f"{dest}/{geojson_file_with_date}")
            print(
                f".\n..\n...\n>>> Label geojson successfully saved:\n\t>>> acquisition date: {acquisition_date} <<<\n\t>>> {dest}/{geojson_file_with_date} <<<"
            )
        except FileNotFoundError:
            raise TerrakitBaseException(f"Error: {dest}/{geojson_file} not found.")
        except PermissionError:
            raise TerrakitBaseException(
                f"Error: Check permission to rename {dest}/{geojson_file}."
            )
        except OSError as e:
            raise TerrakitBaseException(
                f"An error occurred append date to {dest}/{geojson_file}: {e}"
            )
        print(".\n..\n...\n>>> Downloaded completed successfully <<<")
    return f"{dest}/{geojson_file_with_date}"

hugging_face_file_downloader(repo_id: str, filename: str, revision: str = 'main', subfolder: str | None = None, dest: str = '.')

Downloads a label file from Hugging Face Hub.

Parameters:

Name Type Description Default
repo_id str

The Hugging Face Hub repository ID.

required
filename str

The name of the file to download.

required
revision str

The revision or commit to download. Defaults to "main".

'main'
subfolder str

The subfolder within the repository to download from. Defaults to None.

None
dest str

The destination directory to save the downloaded file. Defaults to the current directory (.).

'.'

Returns:

Name Type Description
str

The path to the downloaded file.

Example
hugging_face_file_downloader(
    repo_id="ibm-nasa-geospatial/hls_burn_scars",
    filename="subsetted_512x512_HLS.S30.T10SGD.2021306.v1.4.mask.tif",
    revision="e48662b31288f1d5f1fd5cf5ebb0e454092a19ce",
    subfolder="training",
    dest="./docs/exmamples/test_wildfire",
)
Source code in terrakit/general_utils/labels_downloader.py
def hugging_face_file_downloader(
    repo_id: str,
    filename: str,
    revision: str = "main",
    subfolder: str | None = None,
    dest: str = ".",
):
    """
    Downloads a label file from Hugging Face Hub.

    Parameters:
        repo_id (str): The Hugging Face Hub repository ID.
        filename (str): The name of the file to download.
        revision (str, optional): The revision or commit to download. Defaults to "main".
        subfolder (str, optional): The subfolder within the repository to download from. Defaults to None.
        dest (str, optional): The destination directory to save the downloaded file. Defaults to the current directory (.).

    Returns:
        str: The path to the downloaded file.

    Example:
        ```python
        hugging_face_file_downloader(
            repo_id="ibm-nasa-geospatial/hls_burn_scars",
            filename="subsetted_512x512_HLS.S30.T10SGD.2021306.v1.4.mask.tif",
            revision="e48662b31288f1d5f1fd5cf5ebb0e454092a19ce",
            subfolder="training",
            dest="./docs/exmamples/test_wildfire",
        )
        ```
    """
    # Create directory to download results to
    try:
        dest = Path(dest)  # type: ignore[assignment]
        dest.mkdir(parents=True, exist_ok=True)  # type: ignore[attr-defined]
    except Exception as e:
        raise TerrakitBaseException(
            f"An issue occurred created {dest}. Please check this is a valid dir: {e}"
        )

    tmp_download_dir = "tmp_hf_download"
    hf_hub_download(
        repo_id=repo_id,
        repo_type="dataset",
        subfolder=subfolder,
        filename=filename,
        revision=revision,
        local_dir="./tmp_hf_download",
    )
    try:
        if subfolder:
            os.rename(
                f"./{tmp_download_dir}/{subfolder}/{filename}", f"{dest}/{filename}"
            )
        else:
            os.rename(f"{tmp_download_dir}/{filename}", f"{dest}/{filename}")
        print(".\n..\n...\n>>> Label successfully saved<<<")
    except FileNotFoundError:
        raise TerrakitBaseException(f"Error: {tmp_download_dir}/{filename} not found.")
    except PermissionError:
        raise TerrakitBaseException(
            f"Error: Check permission to rename {dest}/{filename}."
        )
    except OSError as e:
        raise TerrakitBaseException(
            f"An error occurred append date to {dest}/{filename}: {e}"
        )
    print(".\n..\n...\n>>> Downloaded completed successfully <<<")

terrakit.general_utils.statistics

compute_stats(dataset)

Compute descriptive statistics for a given dataset.

Parameters:

Name Type Description Default
dataset ndarray

The dataset for which to compute statistics.

required

Returns:

Name Type Description
tuple

A tuple containing mean, median, minimum, maximum, standard deviation, and count of the dataset.

Source code in terrakit/general_utils/statistics.py
def compute_stats(dataset):
    """
    Compute descriptive statistics for a given dataset.

    Parameters:
        dataset (numpy.ndarray): The dataset for which to compute statistics.

    Returns:
        tuple: A tuple containing mean, median, minimum, maximum, standard deviation, and count of the dataset.
    """
    mean_val = np.mean(dataset)
    median_val = np.median(dataset)
    min_val = np.min(dataset)
    max_val = np.max(dataset)
    std_dev = np.std(dataset)
    count = dataset.size
    print(f"Mean pixel value: {mean_val}")
    print(f"Median pixel value: {median_val}")
    print(f"Minimum pixel value: {min_val}")
    print(f"Maximum pixel value: {max_val}")
    print(f"Standard deviation: {std_dev}")
    print(f"Number of masked pixels: {count}\n--------")
    return mean_val, median_val, min_val, max_val, std_dev, count

compute_stats_for_masked_pixels(image, mask)

Compute descriptive statistics for masked pixels in the given image.

Parameters:

Name Type Description Default
image ndarray

The image data.

required
mask ndarray

The mask to filter the image data.

required

Returns:

Name Type Description
tuple

A tuple containing mean, median, minimum, maximum, standard deviation, and count of the masked pixels.

Source code in terrakit/general_utils/statistics.py
def compute_stats_for_masked_pixels(image, mask):
    """
    Compute descriptive statistics for masked pixels in the given image.

    Parameters:
        image (numpy.ndarray): The image data.
        mask (numpy.ndarray): The mask to filter the image data.

    Returns:
        tuple: A tuple containing mean, median, minimum, maximum, standard deviation, and count of the masked pixels.
    """
    masked_data = image[mask > 0]
    return compute_stats(masked_data)

load_verified_stats()

Load precomputed statistics for verified data calculated from target_tif = "sentinel_aws_sentinel-2-l2a_2024-08-30_imputed_20" generated using EMSR748

Returns:

Name Type Description
tuple

A tuple containing verified label statistics, verified data statistics, and verified masked statistics.

Source code in terrakit/general_utils/statistics.py
def load_verified_stats():
    """
    Load precomputed statistics for verified data calculated from target_tif = "sentinel_aws_sentinel-2-l2a_2024-08-30_imputed_20" generated using EMSR748

    Returns:
        tuple: A tuple containing verified label statistics, verified data statistics, and verified masked statistics.
    """
    verified_label_stats = (
        np.float64(0.532928466796875),
        np.float64(1.0),
        np.float64(0.0),
        np.float64(1.0),
        np.float64(0.49891453784632),
        65536,
    )

    verified_data_stats = (
        np.float64(6038.482086181641),
        np.float64(6510.0),
        np.float64(507.0),
        np.float64(10984.0),
        np.float64(1818.4264044731774),
        65536,
    )

    verified_mask_stats = (
        np.float64(6435.794422493272),
        np.float64(6784.0),
        np.float64(664.0),
        np.float64(9424.0),
        np.float64(1584.2594606115797),
        34926,
    )
    return verified_label_stats, verified_data_stats, verified_mask_stats

terrakit.general_utils.defaults

get_default_class_args_and_values(cls_name: type) -> dict

Return a dictionary of class arguments and default values.

Parameters:

Name Type Description Default
cls_name type)

The class for which to retrieve default arguments.

required

Returns:

Name Type Description
dict dict

A dictionary containing class arguments and their default values.

Source code in terrakit/general_utils/defaults.py
def get_default_class_args_and_values(cls_name: type) -> dict:
    """
    Return a dictionary of class arguments and default values.

    Parameters:
        cls_name (type) : The class for which to retrieve default arguments.

    Returns:
        dict: A dictionary containing class arguments and their default values.
    """
    default_args = {}
    signature = inspect.signature(cls_name.__init__)  # type: ignore[misc]
    parameters = signature.parameters
    for name, param in parameters.items():
        if name == "self":
            continue  # Skip the 'self' parameter
        default_value = param.default
        if default_value is inspect.Parameter.empty:
            default_value = None
        default_args[name] = default_value
    return default_args

get_pipeline_defaults() -> dict

Return a dictionary of arguments and default values for all pipeline steps.

Returns dict: A dictionary containing class arguments and their default values for pipeline steps.

Example
from terrakit.general_utils.defautls import get_pipeline_default

options = get_pipeline_defaults()
Source code in terrakit/general_utils/defaults.py
def get_pipeline_defaults() -> dict:
    """
    Return a dictionary of arguments and default values for all pipeline steps.

    Returns
        dict: A dictionary containing class arguments and their default values for pipeline steps.

    Example:
        ```python
        from terrakit.general_utils.defautls import get_pipeline_default

        options = get_pipeline_defaults()
        ```
    """
    onboarding_defaults = {}
    onboarding_defaults["labels"] = get_default_class_args_and_values(LabelsCls)
    onboarding_defaults["download"] = get_default_class_args_and_values(DownloadCls)
    onboarding_defaults["chip"] = get_default_class_args_and_values(ChipAndLabelCls)
    return onboarding_defaults

update_pipeline_args(pipeline_options: dict) -> dict

Update default values for any pipeline steps specified in pipelines_options.

Parameters:

Name Type Description Default
pipeline_options dict

Dictionary of all pipeline options.

required

Returns:

Name Type Description
dict dict

Dictionary of class arguments and either default values or 'pipeline_options' values.

Example
from terrakit.general_utils.defautls import update_pipeline_args

my_options = {
    "chip": {
        "sample_dim": 124,
    }
}
onboarding_options = update_pipeline_args(my_options)
Source code in terrakit/general_utils/defaults.py
def update_pipeline_args(pipeline_options: dict) -> dict:
    """
    Update default values for any pipeline steps specified in pipelines_options.

    Parameters:
        pipeline_options (dict): Dictionary of all pipeline options.

    Returns:
        dict: Dictionary of class arguments and either default values or 'pipeline_options' values.

    Example:
        ```python
        from terrakit.general_utils.defautls import update_pipeline_args

        my_options = {
            "chip": {
                "sample_dim": 124,
            }
        }
        onboarding_options = update_pipeline_args(my_options)
        ```
    """
    default_onboarding_options = get_pipeline_defaults()
    for step in pipeline_options:
        if step in default_onboarding_options.keys():
            for parms in pipeline_options[step]:
                if parms in default_onboarding_options[step].keys():
                    default_onboarding_options[step][parms] = pipeline_options[step][
                        parms
                    ]
    return default_onboarding_options

terrakit.general_utils.exceptions

TerrakitBaseException

Bases: Exception

Base exception for all custom exceptions in the project.

Source code in terrakit/general_utils/exceptions.py
class TerrakitBaseException(Exception):
    """Base exception for all custom exceptions in the project."""

    def __init__(self, message: str, details: Union[None, str, Dict[Any, Any]] = None):
        self.message = message
        self.details = details or {}
        super().__init__(self.message)

    def __str__(self) -> str:
        if self.details:
            return f"{self.message} - Additional details: {self.details}"
        return self.message

TerrakitValidationError

Bases: TerrakitBaseException

Raised when there is an validation error.

Source code in terrakit/general_utils/exceptions.py
class TerrakitValidationError(TerrakitBaseException):
    """Raised when there is an validation error."""

    pass

TerrakitValueError

Bases: TerrakitBaseException

Raised when there is invalid input.

Source code in terrakit/general_utils/exceptions.py
class TerrakitValueError(TerrakitBaseException):
    """Raised when there is invalid input."""

    pass