ops_utils.azure_utils

Module for interacting with Azure.

  1"""Module for interacting with Azure."""
  2import os
  3import logging
  4import base64
  5import re
  6from pathlib import Path
  7from datetime import datetime, timezone
  8from urllib.parse import unquote
  9
 10
 11class AzureBlobDetails:
 12    """Class to interact with with Azure Blobs."""
 13
 14    def __init__(self, account_url: str, sas_token: str, container_name: str):
 15        """Initialize the AzureBlobDetails class.
 16
 17        **Args:**
 18        - account_url (str): The Azure account URL
 19        - sas_token (str): The SAS token
 20        - container_name (str): The container name
 21        """
 22        from azure.storage.blob import BlobServiceClient
 23        self.account_url = account_url
 24        """@private"""
 25        self.sas_token = sas_token
 26        """@private"""
 27        self.container_name = container_name
 28        """@private"""
 29        self.blob_service_client = BlobServiceClient(
 30            account_url=self.account_url, credential=self.sas_token)
 31        """@private"""
 32
 33    def get_blob_details(self, max_per_page: int = 500) -> list[dict]:
 34        """
 35        Get details about all Azure blobs within a container.
 36
 37        **Args**:
 38        - max_per_page (int): The maximum number of blobs to return per page
 39        """
 40        container_client = self.blob_service_client.get_container_client(
 41            self.container_name)
 42        details = []
 43
 44        blob_list = container_client.list_blobs(results_per_page=max_per_page)
 45        page = blob_list.by_page()
 46
 47        page_count = 0
 48        for blob_page in page:
 49            page_count += 1
 50            logging.info(
 51                f"Getting page {page_count} of max {max_per_page} blobs")
 52            for blob in blob_page:
 53                blob_client = container_client.get_blob_client(blob)  # type: ignore[arg-type]
 54                props = blob_client.get_blob_properties()
 55                if not blob.name.endswith('/'):
 56                    md5_hash = base64.b64encode(props.content_settings.content_md5).decode(
 57                        'utf-8') if props.content_settings.content_md5 else ""
 58                    full_path = blob_client.url.replace(
 59                        f'?{self.sas_token}', '')
 60                    rel_path = full_path.replace(f"{self.account_url}/{self.container_name}/", '')
 61                    details.append(
 62                        {
 63                            'file_name': blob.name,
 64                            'file_path': full_path,
 65                            'relative_path': rel_path,
 66                            'content_type': props.content_settings.content_type,
 67                            'file_extension': os.path.splitext(blob.name)[1],
 68                            'size_in_bytes': props.size,
 69                            'md5_hash': md5_hash
 70                        }
 71                    )
 72        return details
 73
 74    def download_blob(self, blob_name: str, dl_path: Path) -> None:
 75        """
 76        Download an Azure blob object.
 77
 78        **Args:**
 79        - blob_name (str): The name of the blob to download
 80        - dl_path (Path): The path to download the blob to
 81        """
 82        blob_client = self.blob_service_client.get_blob_client(blob=blob_name, container=self.container_name)
 83        dl_path.parent.mkdir(parents=True, exist_ok=True)
 84        with dl_path.open(mode='wb') as file:
 85            blob_data = blob_client.download_blob()
 86            file.write(blob_data.readall())
 87
 88
 89class SasTokenUtil:
 90    """
 91    Class to obtain and manage Az SAS tokens.
 92
 93    @private
 94    """
 95
 96    def __init__(self, token: str):
 97        """Initialize the SasTokenUtil class."""
 98        self.token = token
 99        self.expiry_datetime = self._set_token_expiry()
100
101    def _set_token_expiry(self) -> datetime:
102        sas_expiry_time_pattern = re.compile(r"se.+?(?=\&sp)")
103        expiry_time_str = sas_expiry_time_pattern.search(self.token)
104        time_str = unquote(expiry_time_str.group()).replace("se=", "").replace("&sr=c", "")  # type: ignore[union-attr]
105        return datetime.fromisoformat(time_str)
106
107    def seconds_until_token_expires(self) -> int:
108        """Get time until token expires."""
109        current_time = datetime.now(timezone.utc)
110        time_delta = self.expiry_datetime - current_time
111        return time_delta.seconds
class AzureBlobDetails:
12class AzureBlobDetails:
13    """Class to interact with with Azure Blobs."""
14
15    def __init__(self, account_url: str, sas_token: str, container_name: str):
16        """Initialize the AzureBlobDetails class.
17
18        **Args:**
19        - account_url (str): The Azure account URL
20        - sas_token (str): The SAS token
21        - container_name (str): The container name
22        """
23        from azure.storage.blob import BlobServiceClient
24        self.account_url = account_url
25        """@private"""
26        self.sas_token = sas_token
27        """@private"""
28        self.container_name = container_name
29        """@private"""
30        self.blob_service_client = BlobServiceClient(
31            account_url=self.account_url, credential=self.sas_token)
32        """@private"""
33
34    def get_blob_details(self, max_per_page: int = 500) -> list[dict]:
35        """
36        Get details about all Azure blobs within a container.
37
38        **Args**:
39        - max_per_page (int): The maximum number of blobs to return per page
40        """
41        container_client = self.blob_service_client.get_container_client(
42            self.container_name)
43        details = []
44
45        blob_list = container_client.list_blobs(results_per_page=max_per_page)
46        page = blob_list.by_page()
47
48        page_count = 0
49        for blob_page in page:
50            page_count += 1
51            logging.info(
52                f"Getting page {page_count} of max {max_per_page} blobs")
53            for blob in blob_page:
54                blob_client = container_client.get_blob_client(blob)  # type: ignore[arg-type]
55                props = blob_client.get_blob_properties()
56                if not blob.name.endswith('/'):
57                    md5_hash = base64.b64encode(props.content_settings.content_md5).decode(
58                        'utf-8') if props.content_settings.content_md5 else ""
59                    full_path = blob_client.url.replace(
60                        f'?{self.sas_token}', '')
61                    rel_path = full_path.replace(f"{self.account_url}/{self.container_name}/", '')
62                    details.append(
63                        {
64                            'file_name': blob.name,
65                            'file_path': full_path,
66                            'relative_path': rel_path,
67                            'content_type': props.content_settings.content_type,
68                            'file_extension': os.path.splitext(blob.name)[1],
69                            'size_in_bytes': props.size,
70                            'md5_hash': md5_hash
71                        }
72                    )
73        return details
74
75    def download_blob(self, blob_name: str, dl_path: Path) -> None:
76        """
77        Download an Azure blob object.
78
79        **Args:**
80        - blob_name (str): The name of the blob to download
81        - dl_path (Path): The path to download the blob to
82        """
83        blob_client = self.blob_service_client.get_blob_client(blob=blob_name, container=self.container_name)
84        dl_path.parent.mkdir(parents=True, exist_ok=True)
85        with dl_path.open(mode='wb') as file:
86            blob_data = blob_client.download_blob()
87            file.write(blob_data.readall())

Class to interact with with Azure Blobs.

AzureBlobDetails(account_url: str, sas_token: str, container_name: str)
15    def __init__(self, account_url: str, sas_token: str, container_name: str):
16        """Initialize the AzureBlobDetails class.
17
18        **Args:**
19        - account_url (str): The Azure account URL
20        - sas_token (str): The SAS token
21        - container_name (str): The container name
22        """
23        from azure.storage.blob import BlobServiceClient
24        self.account_url = account_url
25        """@private"""
26        self.sas_token = sas_token
27        """@private"""
28        self.container_name = container_name
29        """@private"""
30        self.blob_service_client = BlobServiceClient(
31            account_url=self.account_url, credential=self.sas_token)
32        """@private"""

Initialize the AzureBlobDetails class.

Args:

  • account_url (str): The Azure account URL
  • sas_token (str): The SAS token
  • container_name (str): The container name
def get_blob_details(self, max_per_page: int = 500) -> list[dict]:
34    def get_blob_details(self, max_per_page: int = 500) -> list[dict]:
35        """
36        Get details about all Azure blobs within a container.
37
38        **Args**:
39        - max_per_page (int): The maximum number of blobs to return per page
40        """
41        container_client = self.blob_service_client.get_container_client(
42            self.container_name)
43        details = []
44
45        blob_list = container_client.list_blobs(results_per_page=max_per_page)
46        page = blob_list.by_page()
47
48        page_count = 0
49        for blob_page in page:
50            page_count += 1
51            logging.info(
52                f"Getting page {page_count} of max {max_per_page} blobs")
53            for blob in blob_page:
54                blob_client = container_client.get_blob_client(blob)  # type: ignore[arg-type]
55                props = blob_client.get_blob_properties()
56                if not blob.name.endswith('/'):
57                    md5_hash = base64.b64encode(props.content_settings.content_md5).decode(
58                        'utf-8') if props.content_settings.content_md5 else ""
59                    full_path = blob_client.url.replace(
60                        f'?{self.sas_token}', '')
61                    rel_path = full_path.replace(f"{self.account_url}/{self.container_name}/", '')
62                    details.append(
63                        {
64                            'file_name': blob.name,
65                            'file_path': full_path,
66                            'relative_path': rel_path,
67                            'content_type': props.content_settings.content_type,
68                            'file_extension': os.path.splitext(blob.name)[1],
69                            'size_in_bytes': props.size,
70                            'md5_hash': md5_hash
71                        }
72                    )
73        return details

Get details about all Azure blobs within a container.

Args:

  • max_per_page (int): The maximum number of blobs to return per page
def download_blob(self, blob_name: str, dl_path: pathlib._local.Path) -> None:
75    def download_blob(self, blob_name: str, dl_path: Path) -> None:
76        """
77        Download an Azure blob object.
78
79        **Args:**
80        - blob_name (str): The name of the blob to download
81        - dl_path (Path): The path to download the blob to
82        """
83        blob_client = self.blob_service_client.get_blob_client(blob=blob_name, container=self.container_name)
84        dl_path.parent.mkdir(parents=True, exist_ok=True)
85        with dl_path.open(mode='wb') as file:
86            blob_data = blob_client.download_blob()
87            file.write(blob_data.readall())

Download an Azure blob object.

Args:

  • blob_name (str): The name of the blob to download
  • dl_path (Path): The path to download the blob to