Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- Subsetting to bounding box is now also done in the `s3` and `files` code paths [!11](https://github.com/dmidk/sunflow/pull/11), @JoachimKoenigslieb
- Subsetting to bounding box now correctly handles both ascending and descending lat/lon coordinates [!11](https://github.com/dmidk/sunflow/pull/11), @JoachimKoenigslieb

### Changed

- Pass down number of ensembles from configurations to `ProbabilisticAdvection`. This gives a roughly 3x speedup in the no ensembles case [!12](https://github.com/dmidk/sunflow/pull/12), @JoachimKoenigslieb
Expand Down
11 changes: 9 additions & 2 deletions sunflow/data_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from loguru import logger

from .config import NowcastConfig, S3Config
from .geospatial import subset_to_bbox
from .validation import DataNotAvailableError


Expand Down Expand Up @@ -192,6 +193,7 @@ def load_data_from_files(
satellite_data_directory: str,
data_type: str,
filename_format: str,
bbox: str,
) -> xr.Dataset:
"""Load time steps from files (operational mode).

Expand Down Expand Up @@ -232,7 +234,8 @@ def load_data_from_files(

if not collected:
return xr.Dataset()
return xr.concat(collected, dim="time", data_vars=None)
ds = xr.concat(collected, dim="time", data_vars=None)
return subset_to_bbox(ds, bbox)


def check_current_data_existence_s3(
Expand Down Expand Up @@ -281,6 +284,7 @@ def load_data_from_s3(
s3_config: S3Config,
data_type: str,
filename_format: str,
bbox: str,
) -> xr.Dataset:
"""Load time steps from S3.

Expand Down Expand Up @@ -325,7 +329,8 @@ def load_data_from_s3(

if not collected:
return xr.Dataset()
return xr.concat(collected, dim="time", data_vars=None)
ds = xr.concat(collected, dim="time", data_vars=None)
return subset_to_bbox(ds, bbox)


def fetch_clearsky_with_fallback(
Expand Down Expand Up @@ -392,6 +397,7 @@ def fetch_clearsky_with_fallback(
nowcast_config.satellite_data_directory,
"clearsky data",
config["filename_format"],
bbox=bbox,
)
case "s3":
fetched = load_data_from_s3(
Expand All @@ -401,6 +407,7 @@ def fetch_clearsky_with_fallback(
s3_config,
"clearsky data",
config["filename_format"],
bbox=bbox,
)

if offset > 0:
Expand Down
39 changes: 1 addition & 38 deletions sunflow/downloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from loguru import logger

from .data_io import generate_input_filename
from .geospatial import subset_to_bbox


def download_netcdf_knmi(url: str, api_key: str) -> BytesIO:
Expand Down Expand Up @@ -66,44 +67,6 @@ def generate_dwd_filename(timestamp: datetime) -> str:
return f"SISin{timestamp.strftime('%Y%m%d%H%M')}FDv3.nc"


def subset_to_bbox(ds: xr.Dataset, bbox: str) -> xr.Dataset:
"""Subset an xarray Dataset to a geographic bounding box.

Detects the latitude and longitude coordinate names automatically by
matching against common aliases ('lat', 'latitude', 'y' for
latitude; 'lon', 'longitude', 'x' for longitude). If coordinates
cannot be identified, the original dataset is returned unchanged with
a warning.

Args:
ds: Input dataset to subset.
bbox: Bounding box string in the format lon_min,lat_min,lon_max,lat_max.

Returns:
Dataset sliced to the requested bounding box, or the original
dataset if lat/lon coordinates could not be found.
"""
lon_min, lat_min, lon_max, lat_max = map(float, bbox.split(","))

lat_coord = lon_coord = None
for coord in ds.coords:
if coord.lower() in ["lat", "latitude", "y"]:
lat_coord = coord
elif coord.lower() in ["lon", "longitude", "x"]:
lon_coord = coord

if lat_coord is None or lon_coord is None:
logger.warning("Could not find lat/lon coordinates")
return ds

return ds.sel(
{
lat_coord: slice(lat_min, lat_max),
lon_coord: slice(lon_min, lon_max),
}
)


def download_current_data(
request_time: datetime,
config: dict[str, Any],
Expand Down
35 changes: 35 additions & 0 deletions sunflow/geospatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,45 @@
import numpy as np
import pvlib
import xarray as xr
from loguru import logger

from .config import BBOX_OPTIONS


def subset_to_bbox(ds: xr.Dataset, bbox: str) -> xr.Dataset:
"""Subset an xarray Dataset to a geographic bounding box."""
lon_min, lat_min, lon_max, lat_max = map(float, bbox.split(","))

lat_coord = lon_coord = None
for coord in ds.coords:
if coord.lower() in ["lat", "latitude", "y"]:
lat_coord = coord
elif coord.lower() in ["lon", "longitude", "x"]:
lon_coord = coord

if lat_coord is None or lon_coord is None:
logger.warning("Could not find lat/lon coordinates for bbox subsetting")
return ds

# Handle potentially inverted coordinates by detecting order
lat_values = ds[lat_coord].values
lon_values = ds[lon_coord].values

lat_ascending = lat_values[0] < lat_values[-1]
lon_ascending = lon_values[0] < lon_values[-1]

# Arrange slice bounds based on coordinate order
lat_slice = slice(lat_min, lat_max) if lat_ascending else slice(lat_max, lat_min)
lon_slice = slice(lon_min, lon_max) if lon_ascending else slice(lon_max, lon_min)

return ds.sel(
{
lat_coord: lat_slice,
lon_coord: lon_slice,
}
)


def get_bbox(bbox_choice: str, custom_bbox: str | None = None) -> str | None:
"""Return the bounding box string for a given bbox choice.

Expand Down
2 changes: 2 additions & 0 deletions sunflow/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ def run_nowcast(
nowcast_config.satellite_data_directory,
"past data",
config["filename_format"],
bbox=bbox,
)
case "s3":
data = load_data_from_s3(
Expand All @@ -251,6 +252,7 @@ def run_nowcast(
s3_config,
"past data",
config["filename_format"],
bbox=bbox,
)

n_loaded = len(data.time) if "time" in data.coords else 0
Expand Down
Loading