check the full code:
import datetime
import glob
import math
import os
import tempfile
import concurrent.futures
import pandas as pd
import xarray as xr
import numpy as np
import rasterio
from rasterio.transform import from_bounds
from rasterio.warp import reproject, Resampling
from tqdm import tqdm
from constants import Area
from data_utils import open_dataset_file
LOCATION = Area.CZ_TRAVNY_EXTENDED
VARIABLE = “TEMPERATURE”
TIME_VAR = “valid_time”
if LOCATION is Area.BR_VICOSA_EXTENDED:
DATES = (datetime.datetime(2025,2,18), datetime.datetime(2025,3,28))
BOX = (-20.3, -43.40, -21.30,-42.40)
elif LOCATION is Area.CZ_TRAVNY_EXTENDED:
DATES = (datetime.datetime(2025,2,4), datetime.datetime(2025,3,21))
BOX = (50.60, 17.80,49.00,19.40)
elif LOCATION is Area.SK_GANGWON_EXTENDED:
DATES = (datetime.datetime(2025,2,24), datetime.datetime(2025,3,21))
BOX = (38.25, 127.50, 37.35,128.60)
root = “F:/Github/ULC/ioniq-geography/gis/data/copernicus/”
directory = f”{root}{LOCATION.name}_{VARIABLE}_2025-2-01-2025-3-29/” #“F:/Github/ULC/ioniq-geography/gis/data/copernicus/CZ_TRAVNY_ENSEMBLE_MONTHLYAVERAGED_1950-2025/”
cache_directory = os.path.join(tempfile.gettempdir(), “cache”)
os.makedirs(cache_directory, exist_ok=True)
files = [file for file in glob.glob(os.path.join(directory, ”*”))
if file.endswith((‘.nc’, ‘.grib’))]
cached_files = []
for file in files:
cache_path = os.path.join(cache_directory, os.path.basename(file))
if not os.path.exists(cache_path):
os.symlink(file, cache_path)
cached_files.append(cache_path)
def process_variable (ds, var_name, file, bbox):
"""
Process a single variable from an already opened dataset:
- Trims to the given bounding box.
- Resamples each time frame to an 8x8 grid.
- Assembles the frames into a flipbook padded to power-of-two dimensions.
- Saves the flipbook as a GeoTIFF.
"""
da = ds[var_name]
# Determine which time coordinate is available.
time_dim = “time” if “time” in da.dims else “valid_time”
time_values = da[time_dim].values
num_frames = len(time_values)
# Define target bounding box: left, bottom, right, top.
# left, bottom, right, top = 17.86595452, 49.14841321, 19.12404888, 49.95775356
top, left, bottom, right = bbox
# Destination grid dimensions (8x8 pixels).
dst_width, dst_height = 8, 8
dst_transform = from_bounds(left, bottom, right, top, dst_width, dst_height)
# Build source transform from the dataset's longitude/latitude.
lons = ds['longitude'].values
lats = ds['latitude'].values
# Assuming latitude is descending.
src_transform = from_bounds(src_left, src_bottom, src_right, src_top, src_width, src_height)
# Process each time frame.
src_array = da.sel(**{time_dim: t}).values.astype(np.float32)
dst_array = np.empty((dst_height, dst_width), dtype=np.float32)
src_transform=src_transform,
dst_transform=dst_transform,
resampling=Resampling.bilinear
resampled_frames.append(dst_array)
# Create an output filename using the variable name and date.
os.makedirs(os.path.join(directory,var_name), exist_ok=True)
out_file = os.path.join(directory, var_name, f"{os.path.splitext(os.path.basename(file))[0]}__{var_name}__{str(t)[:10]}.tif") # e.g., "temperature_2025-03-18.tif"
# Write the array to a GeoTIFF.
height=dst_array.shape[0],
width=dst_array.shape[1],
crs='EPSG:4326', # adjust CRS if needed
# Assemble the flipbook from the resampled frames.
cols = 8 # math.ceil(math.sqrt(num_frames))
rows = math.ceil(num_frames / cols)
# Compute padded dimensions (power of two).
def next_power_of_two ( n ):
return 2 ** math.ceil(math.log2(n))
orig_width = cols * dst_width
orig_height = rows * dst_height
flipbook_width = next_power_of_two(orig_width)
flipbook_height = next_power_of_two(orig_height)
# --- Compute global min and max across all frames ---
all_frames = np.stack(resampled_frames, axis=0)
global_min = float(np.min(all_frames))
global_max = float(np.max(all_frames))
# --- Normalize each frame using the global min/max ---
if var_name == "u10" or var_name == "v10":
normalized_frames = resampled_frames
(frame - global_min) / (global_max - global_min) for frame in resampled_frames
# --- Assemble normalized frames into a flipbook ---
# For this example, we lay out the frames in a square grid.
flipbook = np.zeros((orig_height, orig_width), dtype=np.float32)
for idx, frame in enumerate(normalized_frames):
flipbook[r * dst_height:(r + 1) * dst_height, c * dst_width:(c + 1) * dst_width] = frame
# --- Write the flipbook as a GeoTIFF ---
# Create an output filename using the file base, variable name, and time range.
start_date = str(time_values[0])[:10]
end_date = str(time_values[-1])[:10]
out_filename = f"{os.path.splitext(os.path.basename(file))[0]}__{var_name}__{rows}x{cols}_{start_date}_{end_date}.tif"
out_file = os.path.join(directory, out_filename)
# Write the flipbook as a GeoTIFF.
height=flipbook.shape[0],
transform=dst_transform # Using the destination transform as a placeholder.
# Prepare properties to store in the CSV.
"start_date": start_date,
"global_min": global_min,
"global_max": global_max,
"num_frames": num_frames,
"dst_height": dst_height,
print(f"Saved flipbook for variable '{var_name}' from file '{file}' as {out_file}")
def process_variable_multi_ds ( _datasets, _var_name, file, bbox, dates = None ):
"""
Process a single variable from an already opened dataset:
- Trims to the given bounding box.
- Resamples each time frame to an 8x8 grid.
- Assembles the frames into a flipbook padded to power-of-two dimensions.
- Saves the flipbook as a GeoTIFF.
"""
top, left, bottom, right = bbox
# Destination grid dimensions (8x8 pixels).
dst_width, dst_height = 8, 8
dst_transform = from_bounds(left, bottom, right, top, dst_width, dst_height)
# Process each dataset and append to the global lists
for dataset in _datasets:
# Determine which time coordinate is available.
time_dim = "time" if "time" in da.dims else "valid_time"
time_values = da[time_dim].values
# Build source transform from the dataset's longitude/latitude.
lons = dataset['longitude'].values
lats = dataset['latitude'].values
# Assuming latitude is descending.
# Latitudes are ascending.
# Latitudes are descending.
src_transform = from_bounds(src_left, src_bottom, src_right, src_top, src_width, src_height)
# Process each time frame.
# Convert timestamp to datetime.datetime
dt = datetime.datetime.fromtimestamp(t.tolist() / 1e9)
if dt < dates[0] or dt > dates[1]:
print(f"Skipping {dt.date()}")
src_array = da.sel(**{time_dim: t}).values.astype(np.float32)
# print(np.min(src_array), np.max(src_array))
dst_array = np.empty((dst_height, dst_width), dtype=np.float32)
src_transform=src_transform,
dst_transform=dst_transform,
resampling=Resampling.bilinear
# print(np.min(dst_array), np.max(dst_array))
resampled_frames.append(dst_array)
real_time_values.append(t)
total_frame_count += real_num_frames
total_time_values.extend(real_time_values)
total_frames.extend(resampled_frames)
# Assemble the flipbook from the resampled frames.
cols = 8 # math.ceil(math.sqrt(num_frames))
rows = math.ceil(total_frame_count / cols)
# Compute padded dimensions (power of two).
def next_power_of_two ( n ):
return 2 ** math.ceil(math.log2(n))
orig_width = cols * dst_width
orig_height = rows * dst_height
flipbook_width = next_power_of_two(orig_width)
flipbook_height = next_power_of_two(orig_height)
# --- Compute global min and max across all frames ---
all_frames = np.stack(total_frames, axis=0)
global_min = float(np.min(all_frames))
global_max = float(np.max(all_frames))
print(global_min, global_max)
# --- Normalize each frame using the global min/max ---
if _var_name == "u10" or _var_name == "v10":
normalized_frames = total_frames
(frame - global_min) / (global_max - global_min) for frame in total_frames
# --- Assemble normalized frames into a flipbook ---
# For this example, we lay out the frames in a square grid.
flipbook = np.zeros((orig_height, orig_width), dtype=np.float32)
for idx, frame in enumerate(normalized_frames):
flipbook[r * dst_height:(r + 1) * dst_height, c * dst_width:(c + 1) * dst_width] = frame
# --- Write the flipbook as a GeoTIFF ---
# Create an output filename using the file base, variable name, and time range.
start_date = str(total_time_values[0])[:10]
end_date = str(time_values[-1])[:10]
out_filename = f"{os.path.splitext(os.path.basename(file))[0]}__{_var_name}__{rows}x{cols}_{start_date}_{end_date}.tif"
out_file = os.path.join(directory, out_filename)
# Write the flipbook as a GeoTIFF.
height=flipbook.shape[0],
transform=dst_transform # Using the destination transform as a placeholder.
# Prepare properties to store in the CSV.
"start_date": start_date,
"global_min": global_min,
"global_max": global_max,
"num_frames": total_frame_count,
"dst_height": dst_height,
print(f"Saved flipbook for variable '{_var_name}' from file '{file}' as {out_file}")
all_properties = []
datasets = [open_dataset_file(f) for f in tqdm(files, desc=“Opening files”)]
ds = xr.concat(datasets, dim=TIME_VAR)
futures = []
with concurrent.futures.ThreadPoolExecutor() as executor:
for var_name in ds.data_vars:
futures.append(executor.submit(process_variable_multi_ds, datasets, var_name, file, BOX, DATES))
for future in concurrent.futures.as_completed(futures):
try:
props = future.result()
all_properties.append(props)
except Exception as e:
print(“Error processing variable:”, e)
df = pd.DataFrame(all_properties)
csv_file = os.path.join(directory, “variable_properties.csv”)
df.to_csv(csv_file, index=False)
print(“Saved variable properties CSV:”, csv_file)