import concurrent.futures
from rasterio.transform import from_bounds
from rasterio.warp import reproject, Resampling
# Import your own helper to open datasets.
from data_utils import open_dataset_file
# Define the working directory and a cache directory.
directory = "F:/Github/ULC/ioniq-geography/gis/data/copernicus/CZ_TRAVNY_ENSEMBLE_DAILYMEAN_02_2025/"
cache_directory = os.path.join(tempfile.gettempdir(), "cache")
os.makedirs(cache_directory, exist_ok=True)
# Get list of GRIB/NetCDF files and create symlinks in the cache.
files = [file for file in glob.glob(os.path.join(directory, "*"))
if file.endswith(('.nc', '.grib'))]
cache_path = os.path.join(cache_directory, os.path.basename(file))
if not os.path.exists(cache_path):
os.symlink(file, cache_path)
cached_files.append(cache_path)
def process_variable(ds, var_name, file):
For a given already opened dataset and a variable name:
- Trim to the specified bounding box.
- Resample each time frame to an 8x8 grid.
- Compute the global min/max (across all time frames) and normalize each frame.
- Assemble the normalized frames into a flipbook.
- Save the flipbook as a GeoTIFF.
- Return important properties as a dictionary.
# Determine the time coordinate.
time_dim = "time" if "time" in da.dims else "valid_time"
time_values = da[time_dim].values
num_frames = len(time_values)
# --- Define destination grid parameters ---
# Bounding box: left, bottom, right, top.
left, bottom, right, top = 17.86595452, 49.14841321, 19.12404888, 49.95775356
# Destination dimensions (8x8 pixels).
dst_width, dst_height = 8, 8
dst_transform = from_bounds(left, bottom, right, top, dst_width, dst_height)
# --- Build source grid transform from dataset's coordinates ---
lons = ds['longitude'].values
lats = ds['latitude'].values
# Assuming lats are in descending order.
src_transform = from_bounds(src_left, src_bottom, src_right, src_top, src_width, src_height)
# --- Process each time frame: reproject to the 8x8 grid ---
src_array = da.sel(**{time_dim: t}).values.astype(np.float32)
dst_array = np.empty((dst_height, dst_width), dtype=np.float32)
src_transform=src_transform,
dst_transform=dst_transform,
resampling=Resampling.bilinear
reprojected_frames.append(dst_array)
# --- Compute global min and max across all frames ---
all_frames = np.stack(reprojected_frames, axis=0)
global_min = float(np.min(all_frames))
global_max = float(np.max(all_frames))
# --- Normalize each frame using the global min/max ---
(frame - global_min) / (global_max - global_min) for frame in reprojected_frames
# --- Assemble normalized frames into a flipbook ---
# For this example, we lay out the frames in a square grid.
cols = math.ceil(math.sqrt(num_frames))
rows = math.ceil(num_frames / cols)
orig_width = cols * dst_width
orig_height = rows * dst_height
flipbook = np.zeros((orig_height, orig_width), dtype=np.float32)
for idx, frame in enumerate(normalized_frames):
flipbook[r*dst_height:(r+1)*dst_height, c*dst_width:(c+1)*dst_width] = frame
# --- Write the flipbook as a GeoTIFF ---
start_date = str(time_values[0])[:10]
end_date = str(time_values[-1])[:10]
out_filename = f"{os.path.splitext(os.path.basename(file))[0]}__{var_name}__{rows}x{cols}_{start_date}_{end_date}.tif"
out_file = os.path.join(directory, out_filename)
height=flipbook.shape[0],
transform=dst_transform # Using the destination transform as a placeholder.
print(f"Processed variable '{var_name}' from file '{file}'")
# Prepare properties to store in the CSV.
"start_date": start_date,
"global_min": global_min,
"global_max": global_max,
"num_frames": num_frames,
"dst_height": dst_height,
# Process each file sequentially; for each file, process variables in parallel.
for file in cached_files:
print("Processing file:", file)
ds = open_dataset_file(file)
with concurrent.futures.ThreadPoolExecutor() as executor:
for var_name in ds.data_vars:
futures.append(executor.submit(process_variable, ds, var_name, file))
for future in concurrent.futures.as_completed(futures):
all_properties.append(props)
print("Error processing variable:", e)
# Save the variable properties to a CSV file.
df = pd.DataFrame(all_properties)
csv_file = os.path.join(directory, "variable_properties.csv")
df.to_csv(csv_file, index=False)
print("Saved variable properties CSV:", csv_file)