ops_utils.csv_util
Module for CSV file operations.
1"""Module for CSV file operations.""" 2import csv 3import logging 4from typing import Optional, Sequence 5 6 7class Csv: 8 """Class for CSV file operations.""" 9 10 def __init__(self, file_path: str, delimiter: str = "\t"): 11 r""" 12 Initialize the Csv class. 13 14 **Args:** 15 - file_path (str): The path to the tabular file. 16 - delimiter (str, optional): The delimiter to use in the tabular file. Defaults to `\\t` (tab-delimited). 17 """ 18 self.file_path = file_path 19 """@private""" 20 self.delimiter = delimiter 21 """@private""" 22 23 def create_tsv_from_list_of_dicts(self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str: 24 """ 25 Create a TSV file from a list of dictionaries. 26 27 **Args:** 28 - list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file. 29 - header_list (list[str], optional): The list of headers to use in the TSV file. 30 If provided, output columns will be in same order as list. Defaults to None. 31 32 **Returns:** 33 - str: The path to the created TSV file. 34 """ 35 # Create one flat unique list by doing list comprehension where it loops 36 # through twice to make it flat and transform to set and back to list 37 # to make it unique 38 if not header_list: 39 header_list = sorted( 40 list( 41 set( 42 [ 43 header_list 44 for d in list_of_dicts 45 for header_list in d.keys() 46 ] 47 ) 48 ) 49 ) 50 logging.info(f'Creating {self.file_path}') 51 with open(self.file_path, 'w', newline='') as f: 52 writer = csv.DictWriter( 53 f, fieldnames=header_list, delimiter='\t', quotechar="'", extrasaction='ignore') 54 writer.writeheader() 55 for d in list_of_dicts: 56 writer.writerow(d) 57 return self.file_path 58 59 def create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str: 60 """ 61 Create a TSV file from a list of lists. 62 63 **Args:** 64 - list_of_lists (list[list]): The list of lists to write to the TSV file. 65 66 **Returns:** 67 - str: The path to the created TSV file. 68 """ 69 logging.info(f'Creating {self.file_path}') 70 with open(self.file_path, 'w') as f: 71 for list_of_data in list_of_lists: 72 # Make sure all entries are strings 73 str_only_list = [str(entry) for entry in list_of_data] 74 f.write(self.delimiter.join(str_only_list) + '\n') 75 return self.file_path 76 77 def create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]: 78 """ 79 Create a list of dictionaries from a TSV file with no headers. 80 81 **Args:** 82 - headers_list (list[str]): The list of headers to use for the TSV file. 83 84 **Returns:** 85 - list[dict]: The list of dictionaries created from the TSV file. 86 """ 87 with open(self.file_path, 'r') as f: 88 reader = csv.DictReader( 89 f, delimiter=self.delimiter, fieldnames=headers_list) 90 return [row for row in reader] 91 92 def get_header_order_from_tsv(self) -> Sequence[str]: 93 """ 94 Get the header order from a TSV file. 95 96 **Returns:** 97 - list[str]: The list of headers in the TSV file. 98 """ 99 with open(self.file_path, 'r') as f: 100 reader = csv.DictReader(f, delimiter=self.delimiter, skipinitialspace=True) 101 return reader.fieldnames # type: ignore[return-value] 102 103 def create_list_of_dicts_from_tsv( 104 self, 105 expected_headers: Optional[list[str]] = None, 106 allow_extra_headers: bool = False 107 ) -> list[dict]: 108 """ 109 Create a list of dictionaries from a TSV file. 110 111 **Args:** 112 - expected_headers (list[str], optional): The list of expected headers. If provided, 113 will check that all headers are present in the TSV file. Defaults to None. 114 - allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file. 115 Only used if `expected_headers` is provided. Defaults to False. 116 117 **Returns:** 118 - list[dict]: The list of dictionaries created from the TSV file. 119 120 **Raises:** 121 - ValueError: If the expected headers are not found in the TSV file. 122 """ 123 with open(self.file_path) as f: 124 dict_reader = csv.DictReader( 125 f, delimiter=self.delimiter, skipinitialspace=True) 126 if expected_headers: 127 match = True 128 tsv_headers = dict_reader.fieldnames 129 extra_headers = set(tsv_headers) - set(expected_headers) # type: ignore[arg-type] 130 missing_headers = set(expected_headers) - set(tsv_headers) # type: ignore[arg-type] 131 if extra_headers: 132 extra_string = ','.join(extra_headers) 133 logging.warning( 134 f"Extra headers found in tsv: {extra_string}") 135 if not allow_extra_headers: 136 match = False 137 if missing_headers: 138 missing_string = ','.join(missing_headers) 139 logging.error( 140 f"Missing expected headers: {missing_string}") 141 match = False 142 if not match: 143 raise ValueError( 144 f"Expected headers not in {self.file_path}") 145 return [ 146 { 147 k: v 148 for k, v in row.items() 149 } 150 for row in dict_reader 151 ]
class
Csv:
8class Csv: 9 """Class for CSV file operations.""" 10 11 def __init__(self, file_path: str, delimiter: str = "\t"): 12 r""" 13 Initialize the Csv class. 14 15 **Args:** 16 - file_path (str): The path to the tabular file. 17 - delimiter (str, optional): The delimiter to use in the tabular file. Defaults to `\\t` (tab-delimited). 18 """ 19 self.file_path = file_path 20 """@private""" 21 self.delimiter = delimiter 22 """@private""" 23 24 def create_tsv_from_list_of_dicts(self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str: 25 """ 26 Create a TSV file from a list of dictionaries. 27 28 **Args:** 29 - list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file. 30 - header_list (list[str], optional): The list of headers to use in the TSV file. 31 If provided, output columns will be in same order as list. Defaults to None. 32 33 **Returns:** 34 - str: The path to the created TSV file. 35 """ 36 # Create one flat unique list by doing list comprehension where it loops 37 # through twice to make it flat and transform to set and back to list 38 # to make it unique 39 if not header_list: 40 header_list = sorted( 41 list( 42 set( 43 [ 44 header_list 45 for d in list_of_dicts 46 for header_list in d.keys() 47 ] 48 ) 49 ) 50 ) 51 logging.info(f'Creating {self.file_path}') 52 with open(self.file_path, 'w', newline='') as f: 53 writer = csv.DictWriter( 54 f, fieldnames=header_list, delimiter='\t', quotechar="'", extrasaction='ignore') 55 writer.writeheader() 56 for d in list_of_dicts: 57 writer.writerow(d) 58 return self.file_path 59 60 def create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str: 61 """ 62 Create a TSV file from a list of lists. 63 64 **Args:** 65 - list_of_lists (list[list]): The list of lists to write to the TSV file. 66 67 **Returns:** 68 - str: The path to the created TSV file. 69 """ 70 logging.info(f'Creating {self.file_path}') 71 with open(self.file_path, 'w') as f: 72 for list_of_data in list_of_lists: 73 # Make sure all entries are strings 74 str_only_list = [str(entry) for entry in list_of_data] 75 f.write(self.delimiter.join(str_only_list) + '\n') 76 return self.file_path 77 78 def create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]: 79 """ 80 Create a list of dictionaries from a TSV file with no headers. 81 82 **Args:** 83 - headers_list (list[str]): The list of headers to use for the TSV file. 84 85 **Returns:** 86 - list[dict]: The list of dictionaries created from the TSV file. 87 """ 88 with open(self.file_path, 'r') as f: 89 reader = csv.DictReader( 90 f, delimiter=self.delimiter, fieldnames=headers_list) 91 return [row for row in reader] 92 93 def get_header_order_from_tsv(self) -> Sequence[str]: 94 """ 95 Get the header order from a TSV file. 96 97 **Returns:** 98 - list[str]: The list of headers in the TSV file. 99 """ 100 with open(self.file_path, 'r') as f: 101 reader = csv.DictReader(f, delimiter=self.delimiter, skipinitialspace=True) 102 return reader.fieldnames # type: ignore[return-value] 103 104 def create_list_of_dicts_from_tsv( 105 self, 106 expected_headers: Optional[list[str]] = None, 107 allow_extra_headers: bool = False 108 ) -> list[dict]: 109 """ 110 Create a list of dictionaries from a TSV file. 111 112 **Args:** 113 - expected_headers (list[str], optional): The list of expected headers. If provided, 114 will check that all headers are present in the TSV file. Defaults to None. 115 - allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file. 116 Only used if `expected_headers` is provided. Defaults to False. 117 118 **Returns:** 119 - list[dict]: The list of dictionaries created from the TSV file. 120 121 **Raises:** 122 - ValueError: If the expected headers are not found in the TSV file. 123 """ 124 with open(self.file_path) as f: 125 dict_reader = csv.DictReader( 126 f, delimiter=self.delimiter, skipinitialspace=True) 127 if expected_headers: 128 match = True 129 tsv_headers = dict_reader.fieldnames 130 extra_headers = set(tsv_headers) - set(expected_headers) # type: ignore[arg-type] 131 missing_headers = set(expected_headers) - set(tsv_headers) # type: ignore[arg-type] 132 if extra_headers: 133 extra_string = ','.join(extra_headers) 134 logging.warning( 135 f"Extra headers found in tsv: {extra_string}") 136 if not allow_extra_headers: 137 match = False 138 if missing_headers: 139 missing_string = ','.join(missing_headers) 140 logging.error( 141 f"Missing expected headers: {missing_string}") 142 match = False 143 if not match: 144 raise ValueError( 145 f"Expected headers not in {self.file_path}") 146 return [ 147 { 148 k: v 149 for k, v in row.items() 150 } 151 for row in dict_reader 152 ]
Class for CSV file operations.
Csv(file_path: str, delimiter: str = '\t')
11 def __init__(self, file_path: str, delimiter: str = "\t"): 12 r""" 13 Initialize the Csv class. 14 15 **Args:** 16 - file_path (str): The path to the tabular file. 17 - delimiter (str, optional): The delimiter to use in the tabular file. Defaults to `\\t` (tab-delimited). 18 """ 19 self.file_path = file_path 20 """@private""" 21 self.delimiter = delimiter 22 """@private"""
Initialize the Csv class.
Args:
- file_path (str): The path to the tabular file.
- delimiter (str, optional): The delimiter to use in the tabular file. Defaults to
\\t
(tab-delimited).
def
create_tsv_from_list_of_dicts( self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str:
24 def create_tsv_from_list_of_dicts(self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str: 25 """ 26 Create a TSV file from a list of dictionaries. 27 28 **Args:** 29 - list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file. 30 - header_list (list[str], optional): The list of headers to use in the TSV file. 31 If provided, output columns will be in same order as list. Defaults to None. 32 33 **Returns:** 34 - str: The path to the created TSV file. 35 """ 36 # Create one flat unique list by doing list comprehension where it loops 37 # through twice to make it flat and transform to set and back to list 38 # to make it unique 39 if not header_list: 40 header_list = sorted( 41 list( 42 set( 43 [ 44 header_list 45 for d in list_of_dicts 46 for header_list in d.keys() 47 ] 48 ) 49 ) 50 ) 51 logging.info(f'Creating {self.file_path}') 52 with open(self.file_path, 'w', newline='') as f: 53 writer = csv.DictWriter( 54 f, fieldnames=header_list, delimiter='\t', quotechar="'", extrasaction='ignore') 55 writer.writeheader() 56 for d in list_of_dicts: 57 writer.writerow(d) 58 return self.file_path
Create a TSV file from a list of dictionaries.
Args:
- list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file.
- header_list (list[str], optional): The list of headers to use in the TSV file. If provided, output columns will be in same order as list. Defaults to None.
Returns:
- str: The path to the created TSV file.
def
create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str:
60 def create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str: 61 """ 62 Create a TSV file from a list of lists. 63 64 **Args:** 65 - list_of_lists (list[list]): The list of lists to write to the TSV file. 66 67 **Returns:** 68 - str: The path to the created TSV file. 69 """ 70 logging.info(f'Creating {self.file_path}') 71 with open(self.file_path, 'w') as f: 72 for list_of_data in list_of_lists: 73 # Make sure all entries are strings 74 str_only_list = [str(entry) for entry in list_of_data] 75 f.write(self.delimiter.join(str_only_list) + '\n') 76 return self.file_path
Create a TSV file from a list of lists.
Args:
- list_of_lists (list[list]): The list of lists to write to the TSV file.
Returns:
- str: The path to the created TSV file.
def
create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]:
78 def create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]: 79 """ 80 Create a list of dictionaries from a TSV file with no headers. 81 82 **Args:** 83 - headers_list (list[str]): The list of headers to use for the TSV file. 84 85 **Returns:** 86 - list[dict]: The list of dictionaries created from the TSV file. 87 """ 88 with open(self.file_path, 'r') as f: 89 reader = csv.DictReader( 90 f, delimiter=self.delimiter, fieldnames=headers_list) 91 return [row for row in reader]
Create a list of dictionaries from a TSV file with no headers.
Args:
- headers_list (list[str]): The list of headers to use for the TSV file.
Returns:
- list[dict]: The list of dictionaries created from the TSV file.
def
get_header_order_from_tsv(self) -> Sequence[str]:
93 def get_header_order_from_tsv(self) -> Sequence[str]: 94 """ 95 Get the header order from a TSV file. 96 97 **Returns:** 98 - list[str]: The list of headers in the TSV file. 99 """ 100 with open(self.file_path, 'r') as f: 101 reader = csv.DictReader(f, delimiter=self.delimiter, skipinitialspace=True) 102 return reader.fieldnames # type: ignore[return-value]
Get the header order from a TSV file.
Returns:
- list[str]: The list of headers in the TSV file.
def
create_list_of_dicts_from_tsv( self, expected_headers: Optional[list[str]] = None, allow_extra_headers: bool = False) -> list[dict]:
104 def create_list_of_dicts_from_tsv( 105 self, 106 expected_headers: Optional[list[str]] = None, 107 allow_extra_headers: bool = False 108 ) -> list[dict]: 109 """ 110 Create a list of dictionaries from a TSV file. 111 112 **Args:** 113 - expected_headers (list[str], optional): The list of expected headers. If provided, 114 will check that all headers are present in the TSV file. Defaults to None. 115 - allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file. 116 Only used if `expected_headers` is provided. Defaults to False. 117 118 **Returns:** 119 - list[dict]: The list of dictionaries created from the TSV file. 120 121 **Raises:** 122 - ValueError: If the expected headers are not found in the TSV file. 123 """ 124 with open(self.file_path) as f: 125 dict_reader = csv.DictReader( 126 f, delimiter=self.delimiter, skipinitialspace=True) 127 if expected_headers: 128 match = True 129 tsv_headers = dict_reader.fieldnames 130 extra_headers = set(tsv_headers) - set(expected_headers) # type: ignore[arg-type] 131 missing_headers = set(expected_headers) - set(tsv_headers) # type: ignore[arg-type] 132 if extra_headers: 133 extra_string = ','.join(extra_headers) 134 logging.warning( 135 f"Extra headers found in tsv: {extra_string}") 136 if not allow_extra_headers: 137 match = False 138 if missing_headers: 139 missing_string = ','.join(missing_headers) 140 logging.error( 141 f"Missing expected headers: {missing_string}") 142 match = False 143 if not match: 144 raise ValueError( 145 f"Expected headers not in {self.file_path}") 146 return [ 147 { 148 k: v 149 for k, v in row.items() 150 } 151 for row in dict_reader 152 ]
Create a list of dictionaries from a TSV file.
Args:
- expected_headers (list[str], optional): The list of expected headers. If provided, will check that all headers are present in the TSV file. Defaults to None.
- allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file.
Only used if
expected_headers
is provided. Defaults to False.
Returns:
- list[dict]: The list of dictionaries created from the TSV file.
Raises:
- ValueError: If the expected headers are not found in the TSV file.