only proceed with data download if the tile does not exist
import subprocess
import boto3
import os
import pandas as pd
bucket_name = “dataforgood-fb-data”
prefix = “forests/v1/alsgedi_global_v6_float/chm/”
output_folder = ”../../geo/canopy/“
os.makedirs(output_folder, exist_ok=True)
tile_file = ”../../geo/canopy/filtered_tile_list.csv” # Update with the path to your file
tile_data = pd.read_csv(tile_file)
tile_ids = tile_data[“tile”].dropna().unique()
for tile_id in tile_ids:
tile_filename = f”{tile_id}.tif”
s3_path = f”s3://{bucket_name}/{prefix}{tile_filename}”
local_path = os.path.join(output_folder, tile_filename)
print(f”Downloading {tile_filename} to {local_path}…”)
try:
# Run the AWS CLI command
subprocess.run([“aws”, “s3”, “cp”, s3_path, local_path, “—no-sign-request”], check=True)
print(f”Downloaded: {tile_filename}”)
except subprocess.CalledProcessError as e:
print(f”Failed to download {tile_filename}: {e}”)
print(“Tile download completed.”)