TerraKit: Easy geospatial data search and query¶
TerraKit Data Connectors can be used outside the TerraKit Pipeline. This notebook will guide you through using TerraKit Data Connectors to download data from different data collections.
import numpy as np
import xarray as xr
import pandas as pd
from pathlib import Path
from terrakit import DataConnector
from terrakit.download.transformations.impute_nans_xarray import impute_nans_xarray
from terrakit.download.transformations.scale_data_xarray import scale_data_xarray
from terrakit.download.geodata_utils import save_data_array_to_file
# To be able to query using a data connector, we will import api credentials from .env
import dotenv
dotenv.load_dotenv()
First, we create a DataConnector instance for the connector we wish to use. You can list the available collections for that connector.
Currently, you can select from:
sentinelhubsentinel_awsnasa_earthdataIBMResearchSTACTheWeatherCompany
Sentinel-2-l2a from Sentinel AWS¶
data_connector = "sentinel_aws"
dc = DataConnector(connector_type=data_connector)
dc.connector.list_collections()
Now we can search for data. To do so we select the collection and specify our bounding box and time range.
The find_data() function will search for data and return both a list of unique dates where data is available, but also the raw results from the search. In the case of a STAC catalogue, the results will be the STAC entries, for another connector, it will be the item details which are returned.
bbox = [34.671440, -0.090887, 34.706448, -0.087678]
collection_name = "sentinel-2-l2a"
bands = ["blue", "green", "red"]
unique_dates, results = dc.connector.find_data(
data_collection_name=collection_name,
date_start="2024-01-01",
date_end="2024-01-31",
bands=bands,
bbox=bbox,
)
print(unique_dates)
Now to query the data, we specify the bands we want to return, plus an (optional) save filename. The get_data() function will query the data from the data source and return an xarray object containing the data, plus if you provide save_file= as an argument, it will also save a geotiff file.
If the bands are not found for the collection chosen, it will try to match the alt names stored in the internal collections catalogue, otherwise it will fail and tell you the available bands.
save_filestem = f"./tmp_download/{data_connector}_{collection_name}"
da = dc.connector.get_data(
data_collection_name=collection_name,
date_start="2024-01-01",
date_end="2024-01-31",
bbox=bbox,
bands=bands,
save_file=f"{save_filestem}.tif",
)
dai = scale_data_xarray(da, list(np.ones(len(bands))))
dai = impute_nans_xarray(dai)
save_data_array_to_file(dai, save_file=f"{save_filestem}.tif", imputed=True)
Sentinel-2-l2a from Sentinel Hub¶
Before connecting to the Sentinel hub data connector, ensure credentials have been added to your local environment. The easiest way to do this is to add them to your .env. Login to planet.com to generate a Oauth client ID and client secret, then add them to the .env file:
# .env
SH_CLIENT_ID=<your_token_here>
SH_CLIENT_SECRET=<your_token_here>
data_connector = "sentinelhub"
dc = DataConnector(connector_type=data_connector)
dc.connector.list_collections()
collection_name = "s2_l2a"
bands = ["B04", "B03", "B02"]
unique_dates, results = dc.connector.find_data(
data_collection_name=collection_name,
date_start="2024-01-01",
date_end="2024-01-31",
bbox=bbox,
)
print(unique_dates)
save_filestem = f"./tmp_download/{data_connector}_{collection_name}"
da = dc.connector.get_data(
data_collection_name=collection_name,
date_start="2024-01-01",
date_end="2024-01-31",
bbox=bbox,
bands=bands,
save_file=f"{save_filestem}.tif",
)
dai = scale_data_xarray(da, list(np.ones(len(bands))))
dai = impute_nans_xarray(dai)
save_data_array_to_file(dai, save_file=f"{save_filestem}.tif", imputed=True)
Sentinel-1_grd from Sentinel Hub¶
collection_name = "s1_grd"
bands = ["VV", "VH"]
unique_dates, results = dc.connector.find_data(
data_collection_name=collection_name,
date_start="2024-01-01",
date_end="2024-01-31",
bbox=bbox,
)
print(unique_dates)
date = unique_dates[0]
save_filestem = f"./tmp_download/{data_connector}_{collection_name}"
da = dc.connector.get_data(
data_collection_name=collection_name,
date_start=date,
date_end=date,
bbox=bbox,
bands=bands,
save_file=f"{save_filestem}.tif",
)
dai = scale_data_xarray(da, list(np.ones(len(bands))))
dai = impute_nans_xarray(dai)
save_data_array_to_file(dai, save_file=f"{save_filestem}.tif", imputed=True)
HLS-L30 from NASA Earthdata¶
Before connecting to the NASA Earth data connector, ensure credentials have been added to your local environment. The easiest way to do this is to add them to your .env. Login to urs.earthdata.nasa.gov to generate a token, then add it to the .env file:
# .env
NASA_EARTH_BEARER_TOKEN=<your_token_here>
data_connector = "nasa_earthdata"
dc = DataConnector(connector_type=data_connector)
dc.connector.list_collections()
collection_name = "HLSL30_2.0"
bands = ["B04", "B03", "B02"]
unique_dates, results = dc.connector.find_data(
data_collection_name=collection_name,
date_start="2024-01-01",
date_end="2024-01-31",
bbox=bbox,
)
print(unique_dates)
date = unique_dates[0]
save_filestem = f"./tmp_download/{data_connector}_{collection_name}"
da = dc.connector.get_data(
data_collection_name=collection_name,
date_start="2024-01-01",
date_end="2024-01-31",
bbox=bbox,
bands=bands,
save_file=f"{save_filestem}.tif",
)
dai = scale_data_xarray(da, list(np.ones(len(bands))))
dai = impute_nans_xarray(dai)
save_data_array_to_file(dai, save_file=f"{save_filestem}.tif", imputed=True)
Sentinel-5p-l3grd-ch4-wfmd from IBM Research STAC¶
Before connecting to the IBM Research STAC data connector, ensure credentials have been added to your local environment. The easiest way to do this is to add them to your .env.
# .env
APPID_ISSUER=<issuer>
APPID_USERNAME=<user-email>
APPID_PASSWORD=<user-password>
CLIENT_ID=<client-id>
CLIENT_SECRET=<client-secret>
data_connector = "IBMResearchSTAC"
dc = DataConnector(connector_type=data_connector)
dc.connector.list_collections()
collection_name = "sentinel-5p-l3grd-ch4-wfmd"
date_start = "2024-01-19"
date_end = "2024-01-21"
bands = ["CH4_column_volume_mixing_ratio"]
bbox = [-102.3, 31.5, -101.7, 32.1]
unique_dates, results = dc.connector.find_data(
data_collection_name=collection_name,
date_start=date_start,
date_end=date_end,
bands=bands,
bbox=bbox,
)
print(unique_dates)
file_path = f"./tmp_download/{data_connector}_{collection_name}.nc"
da = dc.connector.get_data(
data_collection_name=collection_name,
date_start=date_start,
date_end=date_end,
bbox=bbox,
bands=bands,
save_file=file_path,
)
print(file_path)
ds = xr.open_dataset(file_path)
ds
variable = list(ds)[0]
ds[variable].isel(bands=0, time=2).plot()
Example for forecast temperature from The Weather Company¶
Before connecting to the The Weather Company data connector, ensure credentials have been added to your local environment. The easiest way to do this is to add them to your .env.
# .env
THE_WEATHER_COMPANY_API_KEY=<your_api_key_here>
data_connector = "TheWeatherCompany"
dc = DataConnector(connector_type=data_connector)
dc.connector.list_collections()
collection_name = "weathercompany-daily-forecast"
start_timestamp = pd.Timestamp.today().date()
date_start = start_timestamp.isoformat()
end_timestamp = start_timestamp + pd.Timedelta(15, unit="D")
date_end = end_timestamp.isoformat()
bands = ["temperatureMax"]
bbox = (-102.3, 31.5, -101.7, 32.1)
unique_dates, results = dc.connector.find_data(
data_collection_name=collection_name,
date_start=date_start,
date_end=date_end,
bands=bands,
bbox=bbox,
)
print(unique_dates)
data_dir = Path(".") / "tmp_download"
if not data_dir.exists():
data_dir.mkdir()
save_filestem = f"./tmp_download/{data_connector}_{collection_name}"
da = dc.connector.get_data(
data_collection_name=collection_name,
date_start=date_start,
date_end=date_end,
bbox=bbox,
bands=bands,
save_file=f"{save_filestem}.nc",
)
for file_path in data_dir.rglob("*.nc"):
if file_path.is_file():
print(file_path)
ds = xr.open_dataset("tmp_download/TheWeatherCompany_weathercompany-daily-forecast.nc")
ds
ds["__xarray_dataarray_variable__"].isel(bands=0, time=1).plot()