Work With Cloud Storage#
This notebook provides a comprehensive guide for interacting with cloud storage services, including Amazon S3 and Google Cloud Storage (GCS). It demonstrates how to upload, download, and generate presigned URLs for files, with a focus on handling geospatial data such as Cloud Optimized GeoTIFFs (COGs). The notebook covers the following workflows:
Amazon S3 Integration:
Configures an S3 client using
boto3with credentials loaded from environment variables.Demonstrates file uploads to S3 buckets with public-read access and multipart upload configurations.
Shows how to generate presigned URLs for secure, temporary access to files.
Includes functionality to download files from S3.
Google Cloud Storage (GCS) Integration:
Configures a GCS client using service account credentials.
Demonstrates file uploads to GCS buckets with optional public access.
Shows how to generate signed URLs for secure, temporary access to files.
Includes functionality to download files from GCS.
Geospatial Data Handling:
Iterates through a list of Cloud Optimized GeoTIFF (COG) files and uploads them to both S3 and GCS.
Generates presigned URLs for accessing the uploaded files.
This notebook provides reusable code snippets and explanations to simplify cloud storage operations, making it easier to manage geospatial data in cloud environments.
Import required libraries#
import os
import glob
import logging
from dotenv import load_dotenv
load_dotenv("../.env")
True
Work With S3 Storage#
os.environ["AWS_REQUEST_CHECKSUM_CALCULATION"]="when_required"
os.environ["AWS_RESPONSE_CHECKSUM_VALIDATION"] = "when_required"
import boto3
from boto3.s3.transfer import TransferConfig
from botocore.exceptions import ClientError
S3_ACCESS_KEY = os.getenv('S3_ACCESS_KEY')
S3_SECRET_ACCESS_KEY = os.getenv('S3_SECRET_ACCESS_KEY')
S3_END_POINT = os.getenv('S3_END_POINT')
BUCKET_NAME = os.getenv('BUCKET_NAME')
config = TransferConfig(multipart_threshold=1024 * 1024 * 100)
def initiate_s3_client(end_point: str, access_key: str, secret_access_key: str):
s3_client = boto3.client(
"s3",
endpoint_url=end_point,
aws_access_key_id=access_key,
aws_secret_access_key=secret_access_key,
)
return s3_client
def upload_file_s3(file_name: str, bucket_name: str, prefix: str):
s3_client = initiate_s3_client(S3_END_POINT, S3_ACCESS_KEY, S3_SECRET_ACCESS_KEY)
object_name = os.path.basename(file_name)
try:
s3_client.upload_file(
Filename=file_name,
Bucket=bucket_name,
Key="{0}/{1}".format(prefix, object_name), # pylint:disable=C0209
ExtraArgs={"ACL": "public-read"},
Config=config,
)
except ClientError as exce:
logging.error(exce)
return False, None
return True, os.path.join("s3://", bucket_name, prefix, object_name)
def create_presigned_post_s3(bucket_name: str, object_name: str, expiration: int = 3600):
s3_client = initiate_s3_client(S3_END_POINT, S3_ACCESS_KEY, S3_SECRET_ACCESS_KEY)
try:
url = s3_client.generate_presigned_url(
"get_object",
Params={"Bucket": bucket_name, "Key": object_name},
ExpiresIn=expiration,
)
except ClientError as e:
logging.error(e)
return None
return url
def download_file_from_s3(bucket_name: str, object_key: str, download_path: str):
s3_client = initiate_s3_client(S3_END_POINT, S3_ACCESS_KEY, S3_SECRET_ACCESS_KEY)
try:
s3_client.download_file(
Bucket=bucket_name,
Key=object_key,
Filename=download_path,
)
except ClientError as exce:
logging.error(exce)
return False, None
return True, download_path
list_cog_files = glob.glob("../data/*_cog.tif")
print (list_cog_files)
['../data/T34SFG_20240621T092031_TCI_10m_cog.tif', '../data/T34SFF_20220209T091131_TCI_10m_cog.tif', '../data/T34SEG_20240601T092031_TCI_10m_cog.tif', '../data/T34SGH_20240608T090601_TCI_10m_cog.tif']
for cog_file in list_cog_files:
file_name = os.path.basename(cog_file)
#prefix = "cog-data"
prefix = "mohanad/cog-data"
print (f"Uploading {file_name} to {BUCKET_NAME}/{prefix}")
success, s3_path = upload_file_s3(cog_file, BUCKET_NAME, prefix)
if success:
print(f"File uploaded successfully to {s3_path}")
url = create_presigned_post_s3(BUCKET_NAME, f"{prefix}/{file_name}")
print(f"Presigned URL: {url}")
else:
print("File upload failed")
Uploading T34SFG_20240621T092031_TCI_10m_cog.tif to meditwin-training/mohanad/cog-data
File uploaded successfully to s3://meditwin-training/mohanad/cog-data/T34SFG_20240621T092031_TCI_10m_cog.tif
Presigned URL: https://object-store.os-api.cci2.ecmwf.int/meditwin-training/mohanad/cog-data/T34SFG_20240621T092031_TCI_10m_cog.tif?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=d0e4fa7b7d8c4d499ca9c7cf5766859b%2F20250620%2Feu-central-1%2Fs3%2Faws4_request&X-Amz-Date=20250620T144430Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=0629db4deb3751c6908b060c7aed0516d5a3ca33d539fdb55864e249cba1b5b0
Uploading T34SFF_20220209T091131_TCI_10m_cog.tif to meditwin-training/mohanad/cog-data
File uploaded successfully to s3://meditwin-training/mohanad/cog-data/T34SFF_20220209T091131_TCI_10m_cog.tif
Presigned URL: https://object-store.os-api.cci2.ecmwf.int/meditwin-training/mohanad/cog-data/T34SFF_20220209T091131_TCI_10m_cog.tif?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=d0e4fa7b7d8c4d499ca9c7cf5766859b%2F20250620%2Feu-central-1%2Fs3%2Faws4_request&X-Amz-Date=20250620T144521Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=f42ce18ba3e6a70646e1ee8cca8c6704a7f8ac4e2f588e8648ad2e1290203929
Uploading T34SEG_20240601T092031_TCI_10m_cog.tif to meditwin-training/mohanad/cog-data
File uploaded successfully to s3://meditwin-training/mohanad/cog-data/T34SEG_20240601T092031_TCI_10m_cog.tif
Presigned URL: https://object-store.os-api.cci2.ecmwf.int/meditwin-training/mohanad/cog-data/T34SEG_20240601T092031_TCI_10m_cog.tif?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=d0e4fa7b7d8c4d499ca9c7cf5766859b%2F20250620%2Feu-central-1%2Fs3%2Faws4_request&X-Amz-Date=20250620T144640Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=e6b57be846046d9a332d0165f620af9a54a381aea28361b0dd8bd036182cba97
Uploading T34SGH_20240608T090601_TCI_10m_cog.tif to meditwin-training/mohanad/cog-data
File uploaded successfully to s3://meditwin-training/mohanad/cog-data/T34SGH_20240608T090601_TCI_10m_cog.tif
Presigned URL: https://object-store.os-api.cci2.ecmwf.int/meditwin-training/mohanad/cog-data/T34SGH_20240608T090601_TCI_10m_cog.tif?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=d0e4fa7b7d8c4d499ca9c7cf5766859b%2F20250620%2Feu-central-1%2Fs3%2Faws4_request&X-Amz-Date=20250620T144757Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=a7641545c63a3c66cfcaf51864beb51cf5c803a9040ab6b568a68bce2a43fb0a
download_file_from_s3(BUCKET_NAME, "mohanad/cog-data/T34SGH_20240608T090601_TCI_10m_cog.tif", "../data/downloaded_cog_s3.tif")
(True, '../data/downloaded_cog_s3.tif')
Work With Google Cloud Storage#
from google.cloud import storage
from google.oauth2 import service_account
from datetime import timedelta
# Example usage:
GCS_CREDENTIALS_PATH = os.getenv('GCS_CREDENTIALS_PATH')
GCS_PROJECT_ID = os.getenv('GCS_PROJECT_ID')
GCS_BUCKET_NAME = os.getenv('GCS_BUCKET_NAME')
print (GCS_CREDENTIALS_PATH)
print (GCS_PROJECT_ID)
print (GCS_BUCKET_NAME)
def initiate_gcs_client(credentials_path: str):
credentials = service_account.Credentials.from_service_account_file(credentials_path)
return storage.Client(credentials=credentials)
def upload_file_gcs(file_name: str, bucket_name: str, prefix: str, credentials_path: str = GCS_CREDENTIALS_PATH):
gcs_client = initiate_gcs_client(credentials_path)
bucket = gcs_client.bucket(bucket_name)
blob_name = f"{prefix}/{os.path.basename(file_name)}"
blob = bucket.blob(blob_name)
try:
blob.upload_from_filename(file_name)
blob.make_public() # Optional: Make it publicly accessible
except Exception as e:
logging.error(e)
return False, None
return True, f"gs://{bucket_name}/{blob_name}"
def create_presigned_url_gcs(bucket_name: str, object_name: str, credentials_path: str = GCS_CREDENTIALS_PATH, expiration: int = 3600):
gcs_client = initiate_gcs_client(credentials_path)
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(object_name)
try:
url = blob.generate_signed_url(
version="v4",
expiration=timedelta(seconds=expiration),
method="GET"
)
except Exception as e:
logging.error(e)
return None
return url
def download_file_from_gcs(bucket_name: str, object_key: str, download_path: str, credentials_path: str = GCS_CREDENTIALS_PATH):
gcs_client = initiate_gcs_client(credentials_path)
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(object_key)
try:
blob.download_to_filename(download_path)
except Exception as e:
logging.error(e)
return False, None
return True, download_path
../gcs-service-account.json
gcp-ml-300114
meditwin-ecmwf
for cog_file in list_cog_files:
file_name = os.path.basename(cog_file)
prefix = "cog-data"
print (f"Uploading {file_name} to {GCS_BUCKET_NAME}/{prefix}")
success, s3_path = upload_file_gcs(cog_file, GCS_BUCKET_NAME, prefix)
if success:
print(f"File uploaded successfully to {s3_path}")
url = create_presigned_url_gcs(GCS_BUCKET_NAME, f"{prefix}/{file_name}")
print(f"Presigned URL: {url}")
else:
print("File upload failed")
Uploading T34SFG_20240621T092031_TCI_10m_cog.tif to meditwin-ecmwf/cog-data
File uploaded successfully to gs://meditwin-ecmwf/cog-data/T34SFG_20240621T092031_TCI_10m_cog.tif
Presigned URL: https://storage.googleapis.com/meditwin-ecmwf/cog-data/T34SFG_20240621T092031_TCI_10m_cog.tif?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=meditwin-ecmwf-gcs%40gcp-ml-300114.iam.gserviceaccount.com%2F20250620%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250620T145232Z&X-Goog-Expires=3600&X-Goog-SignedHeaders=host&X-Goog-Signature=a7e3c17c7b3f0b1599f92299bb445762aee45790b7fb61034b7b1a63094a6e9554fa72477cde896e22f133ae989e7bb0ceb76229b13028f6b31b5805a16d40765c375d0cdc8bfce43a21c167180c88c4cc47e150fe02cc0296eb01cdb6a966d207506994e478414a07afae900533397da5a09ad73f9cf50f30558ee97ee2f57aec5e523fe49c7c0f9d711b5f7345643161fba8db902c3e4c0a3609bf8c16a21a67f09b3cb0da872d6b2f9097c67502e383f60569897766b43332cafb1d97d288415ec72dd6379e6240c3314f1d6cfc7ec52e6f5bdb1ca58d133f28a5a64ebb206c78317ae1106140f067ac74e3d791f61acf5eb9c875918bbc6f6334b3180f67
Uploading T34SFF_20220209T091131_TCI_10m_cog.tif to meditwin-ecmwf/cog-data
File uploaded successfully to gs://meditwin-ecmwf/cog-data/T34SFF_20220209T091131_TCI_10m_cog.tif
Presigned URL: https://storage.googleapis.com/meditwin-ecmwf/cog-data/T34SFF_20220209T091131_TCI_10m_cog.tif?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=meditwin-ecmwf-gcs%40gcp-ml-300114.iam.gserviceaccount.com%2F20250620%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250620T145334Z&X-Goog-Expires=3600&X-Goog-SignedHeaders=host&X-Goog-Signature=c8f75aee7ac8689f634865c9e58090786f4271cf2df20c87b43fa55f0706584b17147498a9bf4fbfbc06f89964ff6f213232fe1a8d29978034b8be7d75f4a44c0f0b57ab02a2366752e40d4885cfc0cddef09f78841c0980c47be9c00c0e827ca51efc8f22fe49a6f20a47f4fd5678e2520514360e35459d0c0a7cfd6799d420278fb589702547e402625a17402e311904af7a8faaac33a2017a0fd416a76fb97b0a5fa80ec0a6e72edd22c3813298715d2e9e17c9d6211f874d87ea43054150f99993c4de952c7ffbf91f98582274095f08efc56a1b7624837b0f819aafbb11f37e683f74f8862c43ab9e30c67175007a9a0e94078b3874d81c82aa33560f16
Uploading T34SEG_20240601T092031_TCI_10m_cog.tif to meditwin-ecmwf/cog-data
File uploaded successfully to gs://meditwin-ecmwf/cog-data/T34SEG_20240601T092031_TCI_10m_cog.tif
Presigned URL: https://storage.googleapis.com/meditwin-ecmwf/cog-data/T34SEG_20240601T092031_TCI_10m_cog.tif?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=meditwin-ecmwf-gcs%40gcp-ml-300114.iam.gserviceaccount.com%2F20250620%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250620T145502Z&X-Goog-Expires=3600&X-Goog-SignedHeaders=host&X-Goog-Signature=e8b817b0e8676d5d7d440a2351c7e006da1750ebfc473f10fcca09ea7572236f9f80e2cc198c30eec105e30e09507c8549a25309cdfffba4890b711a05063b0213aba8f39bddd5ddcc96a01adffa4933b875849d078a39fe7f6b36cc4f0594c5fd77526dd8fc69a7b307887a86deb1472c343e4947fc868bd960576b264ce1d774280d45ff28d251cf84929d465ddaf2d28eb2e3e32f7b22e9cad97141021faeeda59b9a67bdaa0b793da828e286020cd353b4763a15c2ea8e3ff531dc5bf7bdb9195439fe68620eb5622b3e11a2242346be472f6edc2ddb1148b9ad8e19a4867b113d94f42a8f9f88a6fa1287f9e57c7be9e63f26ae968f10f7b958631c914a
Uploading T34SGH_20240608T090601_TCI_10m_cog.tif to meditwin-ecmwf/cog-data
File uploaded successfully to gs://meditwin-ecmwf/cog-data/T34SGH_20240608T090601_TCI_10m_cog.tif
Presigned URL: https://storage.googleapis.com/meditwin-ecmwf/cog-data/T34SGH_20240608T090601_TCI_10m_cog.tif?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=meditwin-ecmwf-gcs%40gcp-ml-300114.iam.gserviceaccount.com%2F20250620%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250620T145618Z&X-Goog-Expires=3600&X-Goog-SignedHeaders=host&X-Goog-Signature=ed392e1541b70081305826fffae0b70bf2f505242bab63764210d49b2a2f2cd163b4aa42062b67f5f88fd7176c3787ae76fadce5ba566d691e222e722ac58c53b9babc993d95c737c758e197da643405bb483c7e5d92d935e8244143984e18c7b91be902708402e6dae9052894a539098465e5e28fe6091647d9e9817ca9198403c74212324ddfa79f8783922b3a9b299bd0a499577fa30869ff1296436ac6ee58c11859654c7d8e43bea55a5c6f829fe75c6ce50d24f5498104e9971e81c88d6ddfddc41d05e3bf918326f6a747ad6dd2e01a4c103c85383e2f0909d31a3abfe9daf09139fdaded418c3052e134b286e180199a2f1e86b8a4561dea0c0b1ef7
download_file_from_gcs(GCS_BUCKET_NAME, "cog-data/T34SGH_20240608T090601_TCI_10m_cog.tif", "../data/downloaded_cog_gcs.tif")
(True, '../data/downloaded_cog_gcs.tif')