ops_utils.csv_util
Module for CSV file operations.
1"""Module for CSV file operations.""" 2import csv 3import logging 4from typing import Optional, Sequence 5 6 7class Csv: 8 """Class for CSV file operations.""" 9 10 def __init__(self, file_path: str, delimiter: str = "\t", verbose: bool = True) -> None: 11 r""" 12 Initialize the Csv class. 13 14 **Args:** 15 - file_path (str): The path to the tabular file. 16 - delimiter (str, optional): The delimiter to use in the tabular file. Defaults to `\\t` (tab-delimited). 17 - verbose (bool, optional): Whether to log the creation of the file. Defaults to `True`. 18 """ 19 self.file_path = file_path 20 """@private""" 21 self.delimiter = delimiter 22 """@private""" 23 self.verbose = verbose 24 """@private""" 25 26 def create_tsv_from_list_of_dicts(self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str: 27 """ 28 Create a TSV file from a list of dictionaries. 29 30 **Args:** 31 - list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file. 32 - header_list (list[str], optional): The list of headers to use in the TSV file. 33 If provided, output columns will be in same order as list. Defaults to None. 34 35 **Returns:** 36 - str: The path to the created TSV file. 37 """ 38 # Create one flat unique list by doing list comprehension where it loops 39 # through twice to make it flat and transform to set and back to list 40 # to make it unique 41 if not header_list: 42 header_list = sorted( 43 list( 44 set( 45 [ 46 header_list 47 for d in list_of_dicts 48 for header_list in d.keys() 49 ] 50 ) 51 ) 52 ) 53 if self.verbose: 54 logging.info(f'Creating {self.file_path}') 55 with open(self.file_path, 'w', newline='') as f: 56 writer = csv.DictWriter( 57 f, fieldnames=header_list, delimiter='\t', quotechar="'", extrasaction='ignore') 58 writer.writeheader() 59 for d in list_of_dicts: 60 writer.writerow(d) 61 return self.file_path 62 63 def create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str: 64 """ 65 Create a TSV file from a list of lists. 66 67 **Args:** 68 - list_of_lists (list[list]): The list of lists to write to the TSV file. 69 70 **Returns:** 71 - str: The path to the created TSV file. 72 """ 73 if self.verbose: 74 logging.info(f'Creating {self.file_path}') 75 with open(self.file_path, 'w') as f: 76 for list_of_data in list_of_lists: 77 # Make sure all entries are strings 78 str_only_list = [str(entry) for entry in list_of_data] 79 f.write(self.delimiter.join(str_only_list) + '\n') 80 return self.file_path 81 82 def create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]: 83 """ 84 Create a list of dictionaries from a TSV file with no headers. 85 86 **Args:** 87 - headers_list (list[str]): The list of headers to use for the TSV file. 88 89 **Returns:** 90 - list[dict]: The list of dictionaries created from the TSV file. 91 """ 92 with open(self.file_path, 'r') as f: 93 reader = csv.DictReader( 94 f, delimiter=self.delimiter, fieldnames=headers_list) 95 return [row for row in reader] 96 97 def get_header_order_from_tsv(self) -> Sequence[str]: 98 """ 99 Get the header order from a TSV file. 100 101 **Returns:** 102 - list[str]: The list of headers in the TSV file. 103 """ 104 with open(self.file_path, 'r') as f: 105 reader = csv.DictReader(f, delimiter=self.delimiter, skipinitialspace=True) 106 return reader.fieldnames # type: ignore[return-value] 107 108 def create_list_of_dicts_from_tsv( 109 self, 110 expected_headers: Optional[list[str]] = None, 111 allow_extra_headers: bool = False 112 ) -> list[dict]: 113 """ 114 Create a list of dictionaries from a TSV file. 115 116 **Args:** 117 - expected_headers (list[str], optional): The list of expected headers. If provided, 118 will check that all headers are present in the TSV file. Defaults to None. 119 - allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file. 120 Only used if `expected_headers` is provided. Defaults to False. 121 122 **Returns:** 123 - list[dict]: The list of dictionaries created from the TSV file. 124 125 **Raises:** 126 - ValueError: If the expected headers are not found in the TSV file. 127 """ 128 with open(self.file_path) as f: 129 dict_reader = csv.DictReader( 130 f, delimiter=self.delimiter, skipinitialspace=True) 131 if expected_headers: 132 match = True 133 tsv_headers = dict_reader.fieldnames 134 extra_headers = set(tsv_headers) - set(expected_headers) # type: ignore[arg-type] 135 missing_headers = set(expected_headers) - set(tsv_headers) # type: ignore[arg-type] 136 if extra_headers: 137 extra_string = ','.join(extra_headers) 138 logging.warning( 139 f"Extra headers found in tsv: {extra_string}") 140 if not allow_extra_headers: 141 match = False 142 if missing_headers: 143 missing_string = ','.join(missing_headers) 144 logging.error( 145 f"Missing expected headers: {missing_string}") 146 match = False 147 if not match: 148 raise ValueError( 149 f"Expected headers not in {self.file_path}") 150 return [ 151 { 152 k: v 153 for k, v in row.items() 154 } 155 for row in dict_reader 156 ]
class
Csv:
8class Csv: 9 """Class for CSV file operations.""" 10 11 def __init__(self, file_path: str, delimiter: str = "\t", verbose: bool = True) -> None: 12 r""" 13 Initialize the Csv class. 14 15 **Args:** 16 - file_path (str): The path to the tabular file. 17 - delimiter (str, optional): The delimiter to use in the tabular file. Defaults to `\\t` (tab-delimited). 18 - verbose (bool, optional): Whether to log the creation of the file. Defaults to `True`. 19 """ 20 self.file_path = file_path 21 """@private""" 22 self.delimiter = delimiter 23 """@private""" 24 self.verbose = verbose 25 """@private""" 26 27 def create_tsv_from_list_of_dicts(self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str: 28 """ 29 Create a TSV file from a list of dictionaries. 30 31 **Args:** 32 - list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file. 33 - header_list (list[str], optional): The list of headers to use in the TSV file. 34 If provided, output columns will be in same order as list. Defaults to None. 35 36 **Returns:** 37 - str: The path to the created TSV file. 38 """ 39 # Create one flat unique list by doing list comprehension where it loops 40 # through twice to make it flat and transform to set and back to list 41 # to make it unique 42 if not header_list: 43 header_list = sorted( 44 list( 45 set( 46 [ 47 header_list 48 for d in list_of_dicts 49 for header_list in d.keys() 50 ] 51 ) 52 ) 53 ) 54 if self.verbose: 55 logging.info(f'Creating {self.file_path}') 56 with open(self.file_path, 'w', newline='') as f: 57 writer = csv.DictWriter( 58 f, fieldnames=header_list, delimiter='\t', quotechar="'", extrasaction='ignore') 59 writer.writeheader() 60 for d in list_of_dicts: 61 writer.writerow(d) 62 return self.file_path 63 64 def create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str: 65 """ 66 Create a TSV file from a list of lists. 67 68 **Args:** 69 - list_of_lists (list[list]): The list of lists to write to the TSV file. 70 71 **Returns:** 72 - str: The path to the created TSV file. 73 """ 74 if self.verbose: 75 logging.info(f'Creating {self.file_path}') 76 with open(self.file_path, 'w') as f: 77 for list_of_data in list_of_lists: 78 # Make sure all entries are strings 79 str_only_list = [str(entry) for entry in list_of_data] 80 f.write(self.delimiter.join(str_only_list) + '\n') 81 return self.file_path 82 83 def create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]: 84 """ 85 Create a list of dictionaries from a TSV file with no headers. 86 87 **Args:** 88 - headers_list (list[str]): The list of headers to use for the TSV file. 89 90 **Returns:** 91 - list[dict]: The list of dictionaries created from the TSV file. 92 """ 93 with open(self.file_path, 'r') as f: 94 reader = csv.DictReader( 95 f, delimiter=self.delimiter, fieldnames=headers_list) 96 return [row for row in reader] 97 98 def get_header_order_from_tsv(self) -> Sequence[str]: 99 """ 100 Get the header order from a TSV file. 101 102 **Returns:** 103 - list[str]: The list of headers in the TSV file. 104 """ 105 with open(self.file_path, 'r') as f: 106 reader = csv.DictReader(f, delimiter=self.delimiter, skipinitialspace=True) 107 return reader.fieldnames # type: ignore[return-value] 108 109 def create_list_of_dicts_from_tsv( 110 self, 111 expected_headers: Optional[list[str]] = None, 112 allow_extra_headers: bool = False 113 ) -> list[dict]: 114 """ 115 Create a list of dictionaries from a TSV file. 116 117 **Args:** 118 - expected_headers (list[str], optional): The list of expected headers. If provided, 119 will check that all headers are present in the TSV file. Defaults to None. 120 - allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file. 121 Only used if `expected_headers` is provided. Defaults to False. 122 123 **Returns:** 124 - list[dict]: The list of dictionaries created from the TSV file. 125 126 **Raises:** 127 - ValueError: If the expected headers are not found in the TSV file. 128 """ 129 with open(self.file_path) as f: 130 dict_reader = csv.DictReader( 131 f, delimiter=self.delimiter, skipinitialspace=True) 132 if expected_headers: 133 match = True 134 tsv_headers = dict_reader.fieldnames 135 extra_headers = set(tsv_headers) - set(expected_headers) # type: ignore[arg-type] 136 missing_headers = set(expected_headers) - set(tsv_headers) # type: ignore[arg-type] 137 if extra_headers: 138 extra_string = ','.join(extra_headers) 139 logging.warning( 140 f"Extra headers found in tsv: {extra_string}") 141 if not allow_extra_headers: 142 match = False 143 if missing_headers: 144 missing_string = ','.join(missing_headers) 145 logging.error( 146 f"Missing expected headers: {missing_string}") 147 match = False 148 if not match: 149 raise ValueError( 150 f"Expected headers not in {self.file_path}") 151 return [ 152 { 153 k: v 154 for k, v in row.items() 155 } 156 for row in dict_reader 157 ]
Class for CSV file operations.
Csv(file_path: str, delimiter: str = '\t', verbose: bool = True)
11 def __init__(self, file_path: str, delimiter: str = "\t", verbose: bool = True) -> None: 12 r""" 13 Initialize the Csv class. 14 15 **Args:** 16 - file_path (str): The path to the tabular file. 17 - delimiter (str, optional): The delimiter to use in the tabular file. Defaults to `\\t` (tab-delimited). 18 - verbose (bool, optional): Whether to log the creation of the file. Defaults to `True`. 19 """ 20 self.file_path = file_path 21 """@private""" 22 self.delimiter = delimiter 23 """@private""" 24 self.verbose = verbose 25 """@private"""
Initialize the Csv class.
Args:
- file_path (str): The path to the tabular file.
- delimiter (str, optional): The delimiter to use in the tabular file. Defaults to
\\t(tab-delimited). - verbose (bool, optional): Whether to log the creation of the file. Defaults to
True.
def
create_tsv_from_list_of_dicts( self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str:
27 def create_tsv_from_list_of_dicts(self, list_of_dicts: list[dict], header_list: Optional[list[str]] = None) -> str: 28 """ 29 Create a TSV file from a list of dictionaries. 30 31 **Args:** 32 - list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file. 33 - header_list (list[str], optional): The list of headers to use in the TSV file. 34 If provided, output columns will be in same order as list. Defaults to None. 35 36 **Returns:** 37 - str: The path to the created TSV file. 38 """ 39 # Create one flat unique list by doing list comprehension where it loops 40 # through twice to make it flat and transform to set and back to list 41 # to make it unique 42 if not header_list: 43 header_list = sorted( 44 list( 45 set( 46 [ 47 header_list 48 for d in list_of_dicts 49 for header_list in d.keys() 50 ] 51 ) 52 ) 53 ) 54 if self.verbose: 55 logging.info(f'Creating {self.file_path}') 56 with open(self.file_path, 'w', newline='') as f: 57 writer = csv.DictWriter( 58 f, fieldnames=header_list, delimiter='\t', quotechar="'", extrasaction='ignore') 59 writer.writeheader() 60 for d in list_of_dicts: 61 writer.writerow(d) 62 return self.file_path
Create a TSV file from a list of dictionaries.
Args:
- list_of_dicts (list[dict]): The list of dictionaries to write to the TSV file.
- header_list (list[str], optional): The list of headers to use in the TSV file. If provided, output columns will be in same order as list. Defaults to None.
Returns:
- str: The path to the created TSV file.
def
create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str:
64 def create_tsv_from_list_of_lists(self, list_of_lists: list[list]) -> str: 65 """ 66 Create a TSV file from a list of lists. 67 68 **Args:** 69 - list_of_lists (list[list]): The list of lists to write to the TSV file. 70 71 **Returns:** 72 - str: The path to the created TSV file. 73 """ 74 if self.verbose: 75 logging.info(f'Creating {self.file_path}') 76 with open(self.file_path, 'w') as f: 77 for list_of_data in list_of_lists: 78 # Make sure all entries are strings 79 str_only_list = [str(entry) for entry in list_of_data] 80 f.write(self.delimiter.join(str_only_list) + '\n') 81 return self.file_path
Create a TSV file from a list of lists.
Args:
- list_of_lists (list[list]): The list of lists to write to the TSV file.
Returns:
- str: The path to the created TSV file.
def
create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]:
83 def create_list_of_dicts_from_tsv_with_no_headers(self, headers_list: list[str]) -> list[dict]: 84 """ 85 Create a list of dictionaries from a TSV file with no headers. 86 87 **Args:** 88 - headers_list (list[str]): The list of headers to use for the TSV file. 89 90 **Returns:** 91 - list[dict]: The list of dictionaries created from the TSV file. 92 """ 93 with open(self.file_path, 'r') as f: 94 reader = csv.DictReader( 95 f, delimiter=self.delimiter, fieldnames=headers_list) 96 return [row for row in reader]
Create a list of dictionaries from a TSV file with no headers.
Args:
- headers_list (list[str]): The list of headers to use for the TSV file.
Returns:
- list[dict]: The list of dictionaries created from the TSV file.
def
get_header_order_from_tsv(self) -> Sequence[str]:
98 def get_header_order_from_tsv(self) -> Sequence[str]: 99 """ 100 Get the header order from a TSV file. 101 102 **Returns:** 103 - list[str]: The list of headers in the TSV file. 104 """ 105 with open(self.file_path, 'r') as f: 106 reader = csv.DictReader(f, delimiter=self.delimiter, skipinitialspace=True) 107 return reader.fieldnames # type: ignore[return-value]
Get the header order from a TSV file.
Returns:
- list[str]: The list of headers in the TSV file.
def
create_list_of_dicts_from_tsv( self, expected_headers: Optional[list[str]] = None, allow_extra_headers: bool = False) -> list[dict]:
109 def create_list_of_dicts_from_tsv( 110 self, 111 expected_headers: Optional[list[str]] = None, 112 allow_extra_headers: bool = False 113 ) -> list[dict]: 114 """ 115 Create a list of dictionaries from a TSV file. 116 117 **Args:** 118 - expected_headers (list[str], optional): The list of expected headers. If provided, 119 will check that all headers are present in the TSV file. Defaults to None. 120 - allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file. 121 Only used if `expected_headers` is provided. Defaults to False. 122 123 **Returns:** 124 - list[dict]: The list of dictionaries created from the TSV file. 125 126 **Raises:** 127 - ValueError: If the expected headers are not found in the TSV file. 128 """ 129 with open(self.file_path) as f: 130 dict_reader = csv.DictReader( 131 f, delimiter=self.delimiter, skipinitialspace=True) 132 if expected_headers: 133 match = True 134 tsv_headers = dict_reader.fieldnames 135 extra_headers = set(tsv_headers) - set(expected_headers) # type: ignore[arg-type] 136 missing_headers = set(expected_headers) - set(tsv_headers) # type: ignore[arg-type] 137 if extra_headers: 138 extra_string = ','.join(extra_headers) 139 logging.warning( 140 f"Extra headers found in tsv: {extra_string}") 141 if not allow_extra_headers: 142 match = False 143 if missing_headers: 144 missing_string = ','.join(missing_headers) 145 logging.error( 146 f"Missing expected headers: {missing_string}") 147 match = False 148 if not match: 149 raise ValueError( 150 f"Expected headers not in {self.file_path}") 151 return [ 152 { 153 k: v 154 for k, v in row.items() 155 } 156 for row in dict_reader 157 ]
Create a list of dictionaries from a TSV file.
Args:
- expected_headers (list[str], optional): The list of expected headers. If provided, will check that all headers are present in the TSV file. Defaults to None.
- allow_extra_headers (bool, optional): Whether to allow extra headers in the TSV file.
Only used if
expected_headersis provided. Defaults to False.
Returns:
- list[dict]: The list of dictionaries created from the TSV file.
Raises:
- ValueError: If the expected headers are not found in the TSV file.