ops_utils.csv_util

Module for CSV file operations.

  1"""Module for CSV file operations."""
  2import csv
  3import logging
  4from typing import Optional, Sequence
  5
  6
  7class Csv:
  8    """Class for CSV file operations."""
  9
 10    def __init__(self, file_path: str, delimiter: str = "\t"):
 11        r"""
 12        Initialize the Csv class.
 13
 14        **Args:**
 15        - file_path (str): The path to the tabular file.
 16        - delimiter (str, optional): The delimiter to use in the tabular file. Defaults to `\\t` (tab-delimited).
 17        """
 18        self.file_path = file_path
 19        """@private"""
 20        self.delimiter = delimiter
 21        """@private"""
 22
 23    def create_tsv_from_list_of_dicts(self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str:
 24        """
 25        Create a TSV file from a list of dictionaries.
 26
 27        **Args:**
 28        - list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file.
 29        - header_list (list[str], optional): The list of headers to use in the TSV file.
 30                If provided, output columns will be in same order as list. Defaults to None.
 31
 32        **Returns:**
 33        - str: The path to the created TSV file.
 34        """
 35        # Create one flat unique list by doing list comprehension where it loops
 36        # through twice to make it flat and transform to set and back to list
 37        # to make it unique
 38        if not header_list:
 39            header_list = sorted(
 40                list(
 41                    set(
 42                        [
 43                            header_list
 44                            for d in list_of_dicts
 45                            for header_list in d.keys()
 46                        ]
 47                    )
 48                )
 49            )
 50        logging.info(f'Creating {self.file_path}')
 51        with open(self.file_path, 'w', newline='') as f:
 52            writer = csv.DictWriter(
 53                f, fieldnames=header_list, delimiter='\t', quotechar="'", extrasaction='ignore')
 54            writer.writeheader()
 55            for d in list_of_dicts:
 56                writer.writerow(d)
 57        return self.file_path
 58
 59    def create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str:
 60        """
 61        Create a TSV file from a list of lists.
 62
 63        **Args:**
 64        - list_of_lists (list[list]): The list of lists to write to the TSV file.
 65
 66        **Returns:**
 67        - str: The path to the created TSV file.
 68        """
 69        logging.info(f'Creating {self.file_path}')
 70        with open(self.file_path, 'w') as f:
 71            for list_of_data in list_of_lists:
 72                # Make sure all entries are strings
 73                str_only_list = [str(entry) for entry in list_of_data]
 74                f.write(self.delimiter.join(str_only_list) + '\n')
 75        return self.file_path
 76
 77    def create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]:
 78        """
 79        Create a list of dictionaries from a TSV file with no headers.
 80
 81        **Args:**
 82        - headers_list (list[str]): The list of headers to use for the TSV file.
 83
 84        **Returns:**
 85        - list[dict]: The list of dictionaries created from the TSV file.
 86        """
 87        with open(self.file_path, 'r') as f:
 88            reader = csv.DictReader(
 89                f, delimiter=self.delimiter, fieldnames=headers_list)
 90            return [row for row in reader]
 91
 92    def get_header_order_from_tsv(self) -> Sequence[str]:
 93        """
 94        Get the header order from a TSV file.
 95
 96        **Returns:**
 97        - list[str]: The list of headers in the TSV file.
 98        """
 99        with open(self.file_path, 'r') as f:
100            reader = csv.DictReader(f, delimiter=self.delimiter, skipinitialspace=True)
101            return reader.fieldnames  # type: ignore[return-value]
102
103    def create_list_of_dicts_from_tsv(
104            self,
105            expected_headers: Optional[list[str]] = None,
106            allow_extra_headers: bool = False
107    ) -> list[dict]:
108        """
109        Create a list of dictionaries from a TSV file.
110
111        **Args:**
112        - expected_headers (list[str], optional): The list of expected headers. If provided,
113                will check that all headers are present in the TSV file. Defaults to None.
114        - allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file.
115                Only used if `expected_headers` is provided. Defaults to False.
116
117        **Returns:**
118        - list[dict]: The list of dictionaries created from the TSV file.
119
120        **Raises:**
121        - ValueError: If the expected headers are not found in the TSV file.
122        """
123        with open(self.file_path) as f:
124            dict_reader = csv.DictReader(
125                f, delimiter=self.delimiter, skipinitialspace=True)
126            if expected_headers:
127                match = True
128                tsv_headers = dict_reader.fieldnames
129                extra_headers = set(tsv_headers) - set(expected_headers)  # type: ignore[arg-type]
130                missing_headers = set(expected_headers) - set(tsv_headers)  # type: ignore[arg-type]
131                if extra_headers:
132                    extra_string = ','.join(extra_headers)
133                    logging.warning(
134                        f"Extra headers found in tsv: {extra_string}")
135                    if not allow_extra_headers:
136                        match = False
137                if missing_headers:
138                    missing_string = ','.join(missing_headers)
139                    logging.error(
140                        f"Missing expected headers: {missing_string}")
141                    match = False
142                if not match:
143                    raise ValueError(
144                        f"Expected headers not in {self.file_path}")
145            return [
146                {
147                    k: v
148                    for k, v in row.items()
149                }
150                for row in dict_reader
151            ]
class Csv:
  8class Csv:
  9    """Class for CSV file operations."""
 10
 11    def __init__(self, file_path: str, delimiter: str = "\t"):
 12        r"""
 13        Initialize the Csv class.
 14
 15        **Args:**
 16        - file_path (str): The path to the tabular file.
 17        - delimiter (str, optional): The delimiter to use in the tabular file. Defaults to `\\t` (tab-delimited).
 18        """
 19        self.file_path = file_path
 20        """@private"""
 21        self.delimiter = delimiter
 22        """@private"""
 23
 24    def create_tsv_from_list_of_dicts(self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str:
 25        """
 26        Create a TSV file from a list of dictionaries.
 27
 28        **Args:**
 29        - list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file.
 30        - header_list (list[str], optional): The list of headers to use in the TSV file.
 31                If provided, output columns will be in same order as list. Defaults to None.
 32
 33        **Returns:**
 34        - str: The path to the created TSV file.
 35        """
 36        # Create one flat unique list by doing list comprehension where it loops
 37        # through twice to make it flat and transform to set and back to list
 38        # to make it unique
 39        if not header_list:
 40            header_list = sorted(
 41                list(
 42                    set(
 43                        [
 44                            header_list
 45                            for d in list_of_dicts
 46                            for header_list in d.keys()
 47                        ]
 48                    )
 49                )
 50            )
 51        logging.info(f'Creating {self.file_path}')
 52        with open(self.file_path, 'w', newline='') as f:
 53            writer = csv.DictWriter(
 54                f, fieldnames=header_list, delimiter='\t', quotechar="'", extrasaction='ignore')
 55            writer.writeheader()
 56            for d in list_of_dicts:
 57                writer.writerow(d)
 58        return self.file_path
 59
 60    def create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str:
 61        """
 62        Create a TSV file from a list of lists.
 63
 64        **Args:**
 65        - list_of_lists (list[list]): The list of lists to write to the TSV file.
 66
 67        **Returns:**
 68        - str: The path to the created TSV file.
 69        """
 70        logging.info(f'Creating {self.file_path}')
 71        with open(self.file_path, 'w') as f:
 72            for list_of_data in list_of_lists:
 73                # Make sure all entries are strings
 74                str_only_list = [str(entry) for entry in list_of_data]
 75                f.write(self.delimiter.join(str_only_list) + '\n')
 76        return self.file_path
 77
 78    def create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]:
 79        """
 80        Create a list of dictionaries from a TSV file with no headers.
 81
 82        **Args:**
 83        - headers_list (list[str]): The list of headers to use for the TSV file.
 84
 85        **Returns:**
 86        - list[dict]: The list of dictionaries created from the TSV file.
 87        """
 88        with open(self.file_path, 'r') as f:
 89            reader = csv.DictReader(
 90                f, delimiter=self.delimiter, fieldnames=headers_list)
 91            return [row for row in reader]
 92
 93    def get_header_order_from_tsv(self) -> Sequence[str]:
 94        """
 95        Get the header order from a TSV file.
 96
 97        **Returns:**
 98        - list[str]: The list of headers in the TSV file.
 99        """
100        with open(self.file_path, 'r') as f:
101            reader = csv.DictReader(f, delimiter=self.delimiter, skipinitialspace=True)
102            return reader.fieldnames  # type: ignore[return-value]
103
104    def create_list_of_dicts_from_tsv(
105            self,
106            expected_headers: Optional[list[str]] = None,
107            allow_extra_headers: bool = False
108    ) -> list[dict]:
109        """
110        Create a list of dictionaries from a TSV file.
111
112        **Args:**
113        - expected_headers (list[str], optional): The list of expected headers. If provided,
114                will check that all headers are present in the TSV file. Defaults to None.
115        - allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file.
116                Only used if `expected_headers` is provided. Defaults to False.
117
118        **Returns:**
119        - list[dict]: The list of dictionaries created from the TSV file.
120
121        **Raises:**
122        - ValueError: If the expected headers are not found in the TSV file.
123        """
124        with open(self.file_path) as f:
125            dict_reader = csv.DictReader(
126                f, delimiter=self.delimiter, skipinitialspace=True)
127            if expected_headers:
128                match = True
129                tsv_headers = dict_reader.fieldnames
130                extra_headers = set(tsv_headers) - set(expected_headers)  # type: ignore[arg-type]
131                missing_headers = set(expected_headers) - set(tsv_headers)  # type: ignore[arg-type]
132                if extra_headers:
133                    extra_string = ','.join(extra_headers)
134                    logging.warning(
135                        f"Extra headers found in tsv: {extra_string}")
136                    if not allow_extra_headers:
137                        match = False
138                if missing_headers:
139                    missing_string = ','.join(missing_headers)
140                    logging.error(
141                        f"Missing expected headers: {missing_string}")
142                    match = False
143                if not match:
144                    raise ValueError(
145                        f"Expected headers not in {self.file_path}")
146            return [
147                {
148                    k: v
149                    for k, v in row.items()
150                }
151                for row in dict_reader
152            ]

Class for CSV file operations.

Csv(file_path: str, delimiter: str = '\t')
11    def __init__(self, file_path: str, delimiter: str = "\t"):
12        r"""
13        Initialize the Csv class.
14
15        **Args:**
16        - file_path (str): The path to the tabular file.
17        - delimiter (str, optional): The delimiter to use in the tabular file. Defaults to `\\t` (tab-delimited).
18        """
19        self.file_path = file_path
20        """@private"""
21        self.delimiter = delimiter
22        """@private"""

Initialize the Csv class.

Args:

  • file_path (str): The path to the tabular file.
  • delimiter (str, optional): The delimiter to use in the tabular file. Defaults to \\t (tab-delimited).
def create_tsv_from_list_of_dicts( self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str:
24    def create_tsv_from_list_of_dicts(self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str:
25        """
26        Create a TSV file from a list of dictionaries.
27
28        **Args:**
29        - list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file.
30        - header_list (list[str], optional): The list of headers to use in the TSV file.
31                If provided, output columns will be in same order as list. Defaults to None.
32
33        **Returns:**
34        - str: The path to the created TSV file.
35        """
36        # Create one flat unique list by doing list comprehension where it loops
37        # through twice to make it flat and transform to set and back to list
38        # to make it unique
39        if not header_list:
40            header_list = sorted(
41                list(
42                    set(
43                        [
44                            header_list
45                            for d in list_of_dicts
46                            for header_list in d.keys()
47                        ]
48                    )
49                )
50            )
51        logging.info(f'Creating {self.file_path}')
52        with open(self.file_path, 'w', newline='') as f:
53            writer = csv.DictWriter(
54                f, fieldnames=header_list, delimiter='\t', quotechar="'", extrasaction='ignore')
55            writer.writeheader()
56            for d in list_of_dicts:
57                writer.writerow(d)
58        return self.file_path

Create a TSV file from a list of dictionaries.

Args:

  • list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file.
  • header_list (list[str], optional): The list of headers to use in the TSV file. If provided, output columns will be in same order as list. Defaults to None.

Returns:

  • str: The path to the created TSV file.
def create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str:
60    def create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str:
61        """
62        Create a TSV file from a list of lists.
63
64        **Args:**
65        - list_of_lists (list[list]): The list of lists to write to the TSV file.
66
67        **Returns:**
68        - str: The path to the created TSV file.
69        """
70        logging.info(f'Creating {self.file_path}')
71        with open(self.file_path, 'w') as f:
72            for list_of_data in list_of_lists:
73                # Make sure all entries are strings
74                str_only_list = [str(entry) for entry in list_of_data]
75                f.write(self.delimiter.join(str_only_list) + '\n')
76        return self.file_path

Create a TSV file from a list of lists.

Args:

  • list_of_lists (list[list]): The list of lists to write to the TSV file.

Returns:

  • str: The path to the created TSV file.
def create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]:
78    def create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]:
79        """
80        Create a list of dictionaries from a TSV file with no headers.
81
82        **Args:**
83        - headers_list (list[str]): The list of headers to use for the TSV file.
84
85        **Returns:**
86        - list[dict]: The list of dictionaries created from the TSV file.
87        """
88        with open(self.file_path, 'r') as f:
89            reader = csv.DictReader(
90                f, delimiter=self.delimiter, fieldnames=headers_list)
91            return [row for row in reader]

Create a list of dictionaries from a TSV file with no headers.

Args:

  • headers_list (list[str]): The list of headers to use for the TSV file.

Returns:

  • list[dict]: The list of dictionaries created from the TSV file.
def get_header_order_from_tsv(self) -> Sequence[str]:
 93    def get_header_order_from_tsv(self) -> Sequence[str]:
 94        """
 95        Get the header order from a TSV file.
 96
 97        **Returns:**
 98        - list[str]: The list of headers in the TSV file.
 99        """
100        with open(self.file_path, 'r') as f:
101            reader = csv.DictReader(f, delimiter=self.delimiter, skipinitialspace=True)
102            return reader.fieldnames  # type: ignore[return-value]

Get the header order from a TSV file.

Returns:

  • list[str]: The list of headers in the TSV file.
def create_list_of_dicts_from_tsv( self, expected_headers: Optional[list[str]] = None, allow_extra_headers: bool = False) -> list[dict]:
104    def create_list_of_dicts_from_tsv(
105            self,
106            expected_headers: Optional[list[str]] = None,
107            allow_extra_headers: bool = False
108    ) -> list[dict]:
109        """
110        Create a list of dictionaries from a TSV file.
111
112        **Args:**
113        - expected_headers (list[str], optional): The list of expected headers. If provided,
114                will check that all headers are present in the TSV file. Defaults to None.
115        - allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file.
116                Only used if `expected_headers` is provided. Defaults to False.
117
118        **Returns:**
119        - list[dict]: The list of dictionaries created from the TSV file.
120
121        **Raises:**
122        - ValueError: If the expected headers are not found in the TSV file.
123        """
124        with open(self.file_path) as f:
125            dict_reader = csv.DictReader(
126                f, delimiter=self.delimiter, skipinitialspace=True)
127            if expected_headers:
128                match = True
129                tsv_headers = dict_reader.fieldnames
130                extra_headers = set(tsv_headers) - set(expected_headers)  # type: ignore[arg-type]
131                missing_headers = set(expected_headers) - set(tsv_headers)  # type: ignore[arg-type]
132                if extra_headers:
133                    extra_string = ','.join(extra_headers)
134                    logging.warning(
135                        f"Extra headers found in tsv: {extra_string}")
136                    if not allow_extra_headers:
137                        match = False
138                if missing_headers:
139                    missing_string = ','.join(missing_headers)
140                    logging.error(
141                        f"Missing expected headers: {missing_string}")
142                    match = False
143                if not match:
144                    raise ValueError(
145                        f"Expected headers not in {self.file_path}")
146            return [
147                {
148                    k: v
149                    for k, v in row.items()
150                }
151                for row in dict_reader
152            ]

Create a list of dictionaries from a TSV file.

Args:

  • expected_headers (list[str], optional): The list of expected headers. If provided, will check that all headers are present in the TSV file. Defaults to None.
  • allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file. Only used if expected_headers is provided. Defaults to False.

Returns:

  • list[dict]: The list of dictionaries created from the TSV file.

Raises:

  • ValueError: If the expected headers are not found in the TSV file.