ops_utils.azure_utils
Module for interacting with Azure.
1"""Module for interacting with Azure.""" 2import os 3import logging 4import base64 5import re 6from pathlib import Path 7from datetime import datetime, timezone 8from urllib.parse import unquote 9 10 11class AzureBlobDetails: 12 """Class to interact with with Azure Blobs.""" 13 14 def __init__(self, account_url: str, sas_token: str, container_name: str): 15 """Initialize the AzureBlobDetails class. 16 17 **Args:** 18 - account_url (str): The Azure account URL 19 - sas_token (str): The SAS token 20 - container_name (str): The container name 21 """ 22 from azure.storage.blob import BlobServiceClient 23 self.account_url = account_url 24 """@private""" 25 self.sas_token = sas_token 26 """@private""" 27 self.container_name = container_name 28 """@private""" 29 self.blob_service_client = BlobServiceClient( 30 account_url=self.account_url, credential=self.sas_token) 31 """@private""" 32 33 def get_blob_details(self, max_per_page: int = 500) -> list[dict]: 34 """ 35 Get details about all Azure blobs within a container. 36 37 **Args**: 38 - max_per_page (int): The maximum number of blobs to return per page 39 """ 40 container_client = self.blob_service_client.get_container_client( 41 self.container_name) 42 details = [] 43 44 blob_list = container_client.list_blobs(results_per_page=max_per_page) 45 page = blob_list.by_page() 46 47 page_count = 0 48 for blob_page in page: 49 page_count += 1 50 logging.info( 51 f"Getting page {page_count} of max {max_per_page} blobs") 52 for blob in blob_page: 53 blob_client = container_client.get_blob_client(blob) # type: ignore[arg-type] 54 props = blob_client.get_blob_properties() 55 if not blob.name.endswith('/'): 56 md5_hash = base64.b64encode(props.content_settings.content_md5).decode( 57 'utf-8') if props.content_settings.content_md5 else "" 58 full_path = blob_client.url.replace( 59 f'?{self.sas_token}', '') 60 rel_path = full_path.replace(f"{self.account_url}/{self.container_name}/", '') 61 details.append( 62 { 63 'file_name': blob.name, 64 'file_path': full_path, 65 'relative_path': rel_path, 66 'content_type': props.content_settings.content_type, 67 'file_extension': os.path.splitext(blob.name)[1], 68 'size_in_bytes': props.size, 69 'md5_hash': md5_hash 70 } 71 ) 72 return details 73 74 def download_blob(self, blob_name: str, dl_path: Path) -> None: 75 """ 76 Download an Azure blob object. 77 78 **Args:** 79 - blob_name (str): The name of the blob to download 80 - dl_path (Path): The path to download the blob to 81 """ 82 blob_client = self.blob_service_client.get_blob_client(blob=blob_name, container=self.container_name) 83 dl_path.parent.mkdir(parents=True, exist_ok=True) 84 with dl_path.open(mode='wb') as file: 85 blob_data = blob_client.download_blob() 86 file.write(blob_data.readall()) 87 88 89class SasTokenUtil: 90 """ 91 Class to obtain and manage Az SAS tokens. 92 93 @private 94 """ 95 96 def __init__(self, token: str): 97 """Initialize the SasTokenUtil class.""" 98 self.token = token 99 self.expiry_datetime = self._set_token_expiry() 100 101 def _set_token_expiry(self) -> datetime: 102 sas_expiry_time_pattern = re.compile(r"se.+?(?=\&sp)") 103 expiry_time_str = sas_expiry_time_pattern.search(self.token) 104 time_str = unquote(expiry_time_str.group()).replace("se=", "").replace("&sr=c", "") # type: ignore[union-attr] 105 return datetime.fromisoformat(time_str) 106 107 def seconds_until_token_expires(self) -> int: 108 """Get time until token expires.""" 109 current_time = datetime.now(timezone.utc) 110 time_delta = self.expiry_datetime - current_time 111 return time_delta.seconds
class
AzureBlobDetails:
12class AzureBlobDetails: 13 """Class to interact with with Azure Blobs.""" 14 15 def __init__(self, account_url: str, sas_token: str, container_name: str): 16 """Initialize the AzureBlobDetails class. 17 18 **Args:** 19 - account_url (str): The Azure account URL 20 - sas_token (str): The SAS token 21 - container_name (str): The container name 22 """ 23 from azure.storage.blob import BlobServiceClient 24 self.account_url = account_url 25 """@private""" 26 self.sas_token = sas_token 27 """@private""" 28 self.container_name = container_name 29 """@private""" 30 self.blob_service_client = BlobServiceClient( 31 account_url=self.account_url, credential=self.sas_token) 32 """@private""" 33 34 def get_blob_details(self, max_per_page: int = 500) -> list[dict]: 35 """ 36 Get details about all Azure blobs within a container. 37 38 **Args**: 39 - max_per_page (int): The maximum number of blobs to return per page 40 """ 41 container_client = self.blob_service_client.get_container_client( 42 self.container_name) 43 details = [] 44 45 blob_list = container_client.list_blobs(results_per_page=max_per_page) 46 page = blob_list.by_page() 47 48 page_count = 0 49 for blob_page in page: 50 page_count += 1 51 logging.info( 52 f"Getting page {page_count} of max {max_per_page} blobs") 53 for blob in blob_page: 54 blob_client = container_client.get_blob_client(blob) # type: ignore[arg-type] 55 props = blob_client.get_blob_properties() 56 if not blob.name.endswith('/'): 57 md5_hash = base64.b64encode(props.content_settings.content_md5).decode( 58 'utf-8') if props.content_settings.content_md5 else "" 59 full_path = blob_client.url.replace( 60 f'?{self.sas_token}', '') 61 rel_path = full_path.replace(f"{self.account_url}/{self.container_name}/", '') 62 details.append( 63 { 64 'file_name': blob.name, 65 'file_path': full_path, 66 'relative_path': rel_path, 67 'content_type': props.content_settings.content_type, 68 'file_extension': os.path.splitext(blob.name)[1], 69 'size_in_bytes': props.size, 70 'md5_hash': md5_hash 71 } 72 ) 73 return details 74 75 def download_blob(self, blob_name: str, dl_path: Path) -> None: 76 """ 77 Download an Azure blob object. 78 79 **Args:** 80 - blob_name (str): The name of the blob to download 81 - dl_path (Path): The path to download the blob to 82 """ 83 blob_client = self.blob_service_client.get_blob_client(blob=blob_name, container=self.container_name) 84 dl_path.parent.mkdir(parents=True, exist_ok=True) 85 with dl_path.open(mode='wb') as file: 86 blob_data = blob_client.download_blob() 87 file.write(blob_data.readall())
Class to interact with with Azure Blobs.
AzureBlobDetails(account_url: str, sas_token: str, container_name: str)
15 def __init__(self, account_url: str, sas_token: str, container_name: str): 16 """Initialize the AzureBlobDetails class. 17 18 **Args:** 19 - account_url (str): The Azure account URL 20 - sas_token (str): The SAS token 21 - container_name (str): The container name 22 """ 23 from azure.storage.blob import BlobServiceClient 24 self.account_url = account_url 25 """@private""" 26 self.sas_token = sas_token 27 """@private""" 28 self.container_name = container_name 29 """@private""" 30 self.blob_service_client = BlobServiceClient( 31 account_url=self.account_url, credential=self.sas_token) 32 """@private"""
Initialize the AzureBlobDetails class.
Args:
- account_url (str): The Azure account URL
- sas_token (str): The SAS token
- container_name (str): The container name
def
get_blob_details(self, max_per_page: int = 500) -> list[dict]:
34 def get_blob_details(self, max_per_page: int = 500) -> list[dict]: 35 """ 36 Get details about all Azure blobs within a container. 37 38 **Args**: 39 - max_per_page (int): The maximum number of blobs to return per page 40 """ 41 container_client = self.blob_service_client.get_container_client( 42 self.container_name) 43 details = [] 44 45 blob_list = container_client.list_blobs(results_per_page=max_per_page) 46 page = blob_list.by_page() 47 48 page_count = 0 49 for blob_page in page: 50 page_count += 1 51 logging.info( 52 f"Getting page {page_count} of max {max_per_page} blobs") 53 for blob in blob_page: 54 blob_client = container_client.get_blob_client(blob) # type: ignore[arg-type] 55 props = blob_client.get_blob_properties() 56 if not blob.name.endswith('/'): 57 md5_hash = base64.b64encode(props.content_settings.content_md5).decode( 58 'utf-8') if props.content_settings.content_md5 else "" 59 full_path = blob_client.url.replace( 60 f'?{self.sas_token}', '') 61 rel_path = full_path.replace(f"{self.account_url}/{self.container_name}/", '') 62 details.append( 63 { 64 'file_name': blob.name, 65 'file_path': full_path, 66 'relative_path': rel_path, 67 'content_type': props.content_settings.content_type, 68 'file_extension': os.path.splitext(blob.name)[1], 69 'size_in_bytes': props.size, 70 'md5_hash': md5_hash 71 } 72 ) 73 return details
Get details about all Azure blobs within a container.
Args:
- max_per_page (int): The maximum number of blobs to return per page
def
download_blob(self, blob_name: str, dl_path: pathlib._local.Path) -> None:
75 def download_blob(self, blob_name: str, dl_path: Path) -> None: 76 """ 77 Download an Azure blob object. 78 79 **Args:** 80 - blob_name (str): The name of the blob to download 81 - dl_path (Path): The path to download the blob to 82 """ 83 blob_client = self.blob_service_client.get_blob_client(blob=blob_name, container=self.container_name) 84 dl_path.parent.mkdir(parents=True, exist_ok=True) 85 with dl_path.open(mode='wb') as file: 86 blob_data = blob_client.download_blob() 87 file.write(blob_data.readall())
Download an Azure blob object.
Args:
- blob_name (str): The name of the blob to download
- dl_path (Path): The path to download the blob to