ops_utils.csv_util

Module for CSV file operations.

  1"""Module for CSV file operations."""
  2import csv
  3import logging
  4from typing import Optional, Sequence
  5
  6
  7class Csv:
  8    """Class for CSV file operations."""
  9
 10    def __init__(self, file_path: str, delimiter: str = "\t", verbose: bool = True) -> None:
 11        r"""
 12        Initialize the Csv class.
 13
 14        **Args:**
 15        - file_path (str): The path to the tabular file.
 16        - delimiter (str, optional): The delimiter to use in the tabular file. Defaults to `\\t` (tab-delimited).
 17        - verbose (bool, optional): Whether to log the creation of the file. Defaults to `True`.
 18        """
 19        self.file_path = file_path
 20        """@private"""
 21        self.delimiter = delimiter
 22        """@private"""
 23        self.verbose = verbose
 24        """@private"""
 25
 26    def create_tsv_from_list_of_dicts(self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str:
 27        """
 28        Create a TSV file from a list of dictionaries.
 29
 30        **Args:**
 31        - list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file.
 32        - header_list (list[str], optional): The list of headers to use in the TSV file.
 33                If provided, output columns will be in same order as list. Defaults to None.
 34
 35        **Returns:**
 36        - str: The path to the created TSV file.
 37        """
 38        # Create one flat unique list by doing list comprehension where it loops
 39        # through twice to make it flat and transform to set and back to list
 40        # to make it unique
 41        if not header_list:
 42            header_list = sorted(
 43                list(
 44                    set(
 45                        [
 46                            header_list
 47                            for d in list_of_dicts
 48                            for header_list in d.keys()
 49                        ]
 50                    )
 51                )
 52            )
 53        if self.verbose:
 54            logging.info(f'Creating {self.file_path}')
 55        with open(self.file_path, 'w', newline='') as f:
 56            writer = csv.DictWriter(
 57                f, fieldnames=header_list, delimiter='\t', quotechar="'", extrasaction='ignore')
 58            writer.writeheader()
 59            for d in list_of_dicts:
 60                writer.writerow(d)
 61        return self.file_path
 62
 63    def create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str:
 64        """
 65        Create a TSV file from a list of lists.
 66
 67        **Args:**
 68        - list_of_lists (list[list]): The list of lists to write to the TSV file.
 69
 70        **Returns:**
 71        - str: The path to the created TSV file.
 72        """
 73        if self.verbose:
 74            logging.info(f'Creating {self.file_path}')
 75        with open(self.file_path, 'w') as f:
 76            for list_of_data in list_of_lists:
 77                # Make sure all entries are strings
 78                str_only_list = [str(entry) for entry in list_of_data]
 79                f.write(self.delimiter.join(str_only_list) + '\n')
 80        return self.file_path
 81
 82    def create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]:
 83        """
 84        Create a list of dictionaries from a TSV file with no headers.
 85
 86        **Args:**
 87        - headers_list (list[str]): The list of headers to use for the TSV file.
 88
 89        **Returns:**
 90        - list[dict]: The list of dictionaries created from the TSV file.
 91        """
 92        with open(self.file_path, 'r') as f:
 93            reader = csv.DictReader(
 94                f, delimiter=self.delimiter, fieldnames=headers_list)
 95            return [row for row in reader]
 96
 97    def get_header_order_from_tsv(self) -> Sequence[str]:
 98        """
 99        Get the header order from a TSV file.
100
101        **Returns:**
102        - list[str]: The list of headers in the TSV file.
103        """
104        with open(self.file_path, 'r') as f:
105            reader = csv.DictReader(f, delimiter=self.delimiter, skipinitialspace=True)
106            return reader.fieldnames  # type: ignore[return-value]
107
108    def create_list_of_dicts_from_tsv(
109            self,
110            expected_headers: Optional[list[str]] = None,
111            allow_extra_headers: bool = False
112    ) -> list[dict]:
113        """
114        Create a list of dictionaries from a TSV file.
115
116        **Args:**
117        - expected_headers (list[str], optional): The list of expected headers. If provided,
118                will check that all headers are present in the TSV file. Defaults to None.
119        - allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file.
120                Only used if `expected_headers` is provided. Defaults to False.
121
122        **Returns:**
123        - list[dict]: The list of dictionaries created from the TSV file.
124
125        **Raises:**
126        - ValueError: If the expected headers are not found in the TSV file.
127        """
128        with open(self.file_path) as f:
129            dict_reader = csv.DictReader(
130                f, delimiter=self.delimiter, skipinitialspace=True)
131            if expected_headers:
132                match = True
133                tsv_headers = dict_reader.fieldnames
134                extra_headers = set(tsv_headers) - set(expected_headers)  # type: ignore[arg-type]
135                missing_headers = set(expected_headers) - set(tsv_headers)  # type: ignore[arg-type]
136                if extra_headers:
137                    extra_string = ','.join(extra_headers)
138                    logging.warning(
139                        f"Extra headers found in tsv: {extra_string}")
140                    if not allow_extra_headers:
141                        match = False
142                if missing_headers:
143                    missing_string = ','.join(missing_headers)
144                    logging.error(
145                        f"Missing expected headers: {missing_string}")
146                    match = False
147                if not match:
148                    raise ValueError(
149                        f"Expected headers not in {self.file_path}")
150            return [
151                {
152                    k: v
153                    for k, v in row.items()
154                }
155                for row in dict_reader
156            ]
class Csv:
  8class Csv:
  9    """Class for CSV file operations."""
 10
 11    def __init__(self, file_path: str, delimiter: str = "\t", verbose: bool = True) -> None:
 12        r"""
 13        Initialize the Csv class.
 14
 15        **Args:**
 16        - file_path (str): The path to the tabular file.
 17        - delimiter (str, optional): The delimiter to use in the tabular file. Defaults to `\\t` (tab-delimited).
 18        - verbose (bool, optional): Whether to log the creation of the file. Defaults to `True`.
 19        """
 20        self.file_path = file_path
 21        """@private"""
 22        self.delimiter = delimiter
 23        """@private"""
 24        self.verbose = verbose
 25        """@private"""
 26
 27    def create_tsv_from_list_of_dicts(self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str:
 28        """
 29        Create a TSV file from a list of dictionaries.
 30
 31        **Args:**
 32        - list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file.
 33        - header_list (list[str], optional): The list of headers to use in the TSV file.
 34                If provided, output columns will be in same order as list. Defaults to None.
 35
 36        **Returns:**
 37        - str: The path to the created TSV file.
 38        """
 39        # Create one flat unique list by doing list comprehension where it loops
 40        # through twice to make it flat and transform to set and back to list
 41        # to make it unique
 42        if not header_list:
 43            header_list = sorted(
 44                list(
 45                    set(
 46                        [
 47                            header_list
 48                            for d in list_of_dicts
 49                            for header_list in d.keys()
 50                        ]
 51                    )
 52                )
 53            )
 54        if self.verbose:
 55            logging.info(f'Creating {self.file_path}')
 56        with open(self.file_path, 'w', newline='') as f:
 57            writer = csv.DictWriter(
 58                f, fieldnames=header_list, delimiter='\t', quotechar="'", extrasaction='ignore')
 59            writer.writeheader()
 60            for d in list_of_dicts:
 61                writer.writerow(d)
 62        return self.file_path
 63
 64    def create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str:
 65        """
 66        Create a TSV file from a list of lists.
 67
 68        **Args:**
 69        - list_of_lists (list[list]): The list of lists to write to the TSV file.
 70
 71        **Returns:**
 72        - str: The path to the created TSV file.
 73        """
 74        if self.verbose:
 75            logging.info(f'Creating {self.file_path}')
 76        with open(self.file_path, 'w') as f:
 77            for list_of_data in list_of_lists:
 78                # Make sure all entries are strings
 79                str_only_list = [str(entry) for entry in list_of_data]
 80                f.write(self.delimiter.join(str_only_list) + '\n')
 81        return self.file_path
 82
 83    def create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]:
 84        """
 85        Create a list of dictionaries from a TSV file with no headers.
 86
 87        **Args:**
 88        - headers_list (list[str]): The list of headers to use for the TSV file.
 89
 90        **Returns:**
 91        - list[dict]: The list of dictionaries created from the TSV file.
 92        """
 93        with open(self.file_path, 'r') as f:
 94            reader = csv.DictReader(
 95                f, delimiter=self.delimiter, fieldnames=headers_list)
 96            return [row for row in reader]
 97
 98    def get_header_order_from_tsv(self) -> Sequence[str]:
 99        """
100        Get the header order from a TSV file.
101
102        **Returns:**
103        - list[str]: The list of headers in the TSV file.
104        """
105        with open(self.file_path, 'r') as f:
106            reader = csv.DictReader(f, delimiter=self.delimiter, skipinitialspace=True)
107            return reader.fieldnames  # type: ignore[return-value]
108
109    def create_list_of_dicts_from_tsv(
110            self,
111            expected_headers: Optional[list[str]] = None,
112            allow_extra_headers: bool = False
113    ) -> list[dict]:
114        """
115        Create a list of dictionaries from a TSV file.
116
117        **Args:**
118        - expected_headers (list[str], optional): The list of expected headers. If provided,
119                will check that all headers are present in the TSV file. Defaults to None.
120        - allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file.
121                Only used if `expected_headers` is provided. Defaults to False.
122
123        **Returns:**
124        - list[dict]: The list of dictionaries created from the TSV file.
125
126        **Raises:**
127        - ValueError: If the expected headers are not found in the TSV file.
128        """
129        with open(self.file_path) as f:
130            dict_reader = csv.DictReader(
131                f, delimiter=self.delimiter, skipinitialspace=True)
132            if expected_headers:
133                match = True
134                tsv_headers = dict_reader.fieldnames
135                extra_headers = set(tsv_headers) - set(expected_headers)  # type: ignore[arg-type]
136                missing_headers = set(expected_headers) - set(tsv_headers)  # type: ignore[arg-type]
137                if extra_headers:
138                    extra_string = ','.join(extra_headers)
139                    logging.warning(
140                        f"Extra headers found in tsv: {extra_string}")
141                    if not allow_extra_headers:
142                        match = False
143                if missing_headers:
144                    missing_string = ','.join(missing_headers)
145                    logging.error(
146                        f"Missing expected headers: {missing_string}")
147                    match = False
148                if not match:
149                    raise ValueError(
150                        f"Expected headers not in {self.file_path}")
151            return [
152                {
153                    k: v
154                    for k, v in row.items()
155                }
156                for row in dict_reader
157            ]

Class for CSV file operations.

Csv(file_path: str, delimiter: str = '\t', verbose: bool = True)
11    def __init__(self, file_path: str, delimiter: str = "\t", verbose: bool = True) -> None:
12        r"""
13        Initialize the Csv class.
14
15        **Args:**
16        - file_path (str): The path to the tabular file.
17        - delimiter (str, optional): The delimiter to use in the tabular file. Defaults to `\\t` (tab-delimited).
18        - verbose (bool, optional): Whether to log the creation of the file. Defaults to `True`.
19        """
20        self.file_path = file_path
21        """@private"""
22        self.delimiter = delimiter
23        """@private"""
24        self.verbose = verbose
25        """@private"""

Initialize the Csv class.

Args:

  • file_path (str): The path to the tabular file.
  • delimiter (str, optional): The delimiter to use in the tabular file. Defaults to \\t (tab-delimited).
  • verbose (bool, optional): Whether to log the creation of the file. Defaults to True.
def create_tsv_from_list_of_dicts( self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str:
27    def create_tsv_from_list_of_dicts(self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str:
28        """
29        Create a TSV file from a list of dictionaries.
30
31        **Args:**
32        - list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file.
33        - header_list (list[str], optional): The list of headers to use in the TSV file.
34                If provided, output columns will be in same order as list. Defaults to None.
35
36        **Returns:**
37        - str: The path to the created TSV file.
38        """
39        # Create one flat unique list by doing list comprehension where it loops
40        # through twice to make it flat and transform to set and back to list
41        # to make it unique
42        if not header_list:
43            header_list = sorted(
44                list(
45                    set(
46                        [
47                            header_list
48                            for d in list_of_dicts
49                            for header_list in d.keys()
50                        ]
51                    )
52                )
53            )
54        if self.verbose:
55            logging.info(f'Creating {self.file_path}')
56        with open(self.file_path, 'w', newline='') as f:
57            writer = csv.DictWriter(
58                f, fieldnames=header_list, delimiter='\t', quotechar="'", extrasaction='ignore')
59            writer.writeheader()
60            for d in list_of_dicts:
61                writer.writerow(d)
62        return self.file_path

Create a TSV file from a list of dictionaries.

Args:

  • list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file.
  • header_list (list[str], optional): The list of headers to use in the TSV file. If provided, output columns will be in same order as list. Defaults to None.

Returns:

  • str: The path to the created TSV file.
def create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str:
64    def create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str:
65        """
66        Create a TSV file from a list of lists.
67
68        **Args:**
69        - list_of_lists (list[list]): The list of lists to write to the TSV file.
70
71        **Returns:**
72        - str: The path to the created TSV file.
73        """
74        if self.verbose:
75            logging.info(f'Creating {self.file_path}')
76        with open(self.file_path, 'w') as f:
77            for list_of_data in list_of_lists:
78                # Make sure all entries are strings
79                str_only_list = [str(entry) for entry in list_of_data]
80                f.write(self.delimiter.join(str_only_list) + '\n')
81        return self.file_path

Create a TSV file from a list of lists.

Args:

  • list_of_lists (list[list]): The list of lists to write to the TSV file.

Returns:

  • str: The path to the created TSV file.
def create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]:
83    def create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]:
84        """
85        Create a list of dictionaries from a TSV file with no headers.
86
87        **Args:**
88        - headers_list (list[str]): The list of headers to use for the TSV file.
89
90        **Returns:**
91        - list[dict]: The list of dictionaries created from the TSV file.
92        """
93        with open(self.file_path, 'r') as f:
94            reader = csv.DictReader(
95                f, delimiter=self.delimiter, fieldnames=headers_list)
96            return [row for row in reader]

Create a list of dictionaries from a TSV file with no headers.

Args:

  • headers_list (list[str]): The list of headers to use for the TSV file.

Returns:

  • list[dict]: The list of dictionaries created from the TSV file.
def get_header_order_from_tsv(self) -> Sequence[str]:
 98    def get_header_order_from_tsv(self) -> Sequence[str]:
 99        """
100        Get the header order from a TSV file.
101
102        **Returns:**
103        - list[str]: The list of headers in the TSV file.
104        """
105        with open(self.file_path, 'r') as f:
106            reader = csv.DictReader(f, delimiter=self.delimiter, skipinitialspace=True)
107            return reader.fieldnames  # type: ignore[return-value]

Get the header order from a TSV file.

Returns:

  • list[str]: The list of headers in the TSV file.
def create_list_of_dicts_from_tsv( self, expected_headers: Optional[list[str]] = None, allow_extra_headers: bool = False) -> list[dict]:
109    def create_list_of_dicts_from_tsv(
110            self,
111            expected_headers: Optional[list[str]] = None,
112            allow_extra_headers: bool = False
113    ) -> list[dict]:
114        """
115        Create a list of dictionaries from a TSV file.
116
117        **Args:**
118        - expected_headers (list[str], optional): The list of expected headers. If provided,
119                will check that all headers are present in the TSV file. Defaults to None.
120        - allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file.
121                Only used if `expected_headers` is provided. Defaults to False.
122
123        **Returns:**
124        - list[dict]: The list of dictionaries created from the TSV file.
125
126        **Raises:**
127        - ValueError: If the expected headers are not found in the TSV file.
128        """
129        with open(self.file_path) as f:
130            dict_reader = csv.DictReader(
131                f, delimiter=self.delimiter, skipinitialspace=True)
132            if expected_headers:
133                match = True
134                tsv_headers = dict_reader.fieldnames
135                extra_headers = set(tsv_headers) - set(expected_headers)  # type: ignore[arg-type]
136                missing_headers = set(expected_headers) - set(tsv_headers)  # type: ignore[arg-type]
137                if extra_headers:
138                    extra_string = ','.join(extra_headers)
139                    logging.warning(
140                        f"Extra headers found in tsv: {extra_string}")
141                    if not allow_extra_headers:
142                        match = False
143                if missing_headers:
144                    missing_string = ','.join(missing_headers)
145                    logging.error(
146                        f"Missing expected headers: {missing_string}")
147                    match = False
148                if not match:
149                    raise ValueError(
150                        f"Expected headers not in {self.file_path}")
151            return [
152                {
153                    k: v
154                    for k, v in row.items()
155                }
156                for row in dict_reader
157            ]

Create a list of dictionaries from a TSV file.

Args:

  • expected_headers (list[str], optional): The list of expected headers. If provided, will check that all headers are present in the TSV file. Defaults to None.
  • allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file. Only used if expected_headers is provided. Defaults to False.

Returns:

  • list[dict]: The list of dictionaries created from the TSV file.

Raises:

  • ValueError: If the expected headers are not found in the TSV file.