diff --git a/src/hdf5_writer.py b/src/hdf5_writer.py index ed3a8c573d840babf805a81e39965def711616c6..2dc1757708284faf315ce9cef94cb3008b895ca1 100644 --- a/src/hdf5_writer.py +++ b/src/hdf5_writer.py @@ -7,6 +7,7 @@ import pandas as pd import numpy as np import h5py import logging +import json import utils.g5505_utils as utils import instruments.filereader_registry as filereader_registry @@ -209,11 +210,22 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str, stdout = inst logging.error('Failed to create group %s into HDF5: %s', group_name, inst) + if 'data_lineage_metadata.json' in filtered_filenames_list: + idx = filtered_filenames_list.index('data_lineage_metadata.json') + data_lineage_file = filtered_filenames_list[idx] + try: + with open('/'.join([dirpath,data_lineage_file]),'r') as dlf: + data_lineage_dict = json.load(dlf) + filtered_filenames_list.pop(idx) + except json.JSONDecodeError: + data_lineage_dict = {} # Start fresh if file is invalid + + else: + data_lineage_dict = {} + + for filenumber, filename in enumerate(filtered_filenames_list): - #file_ext = os.path.splitext(filename)[1] - #try: - # hdf5 path to filename group dest_group_name = f'{group_name}/{filename}' @@ -221,6 +233,10 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str, #file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename)) #file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename)) file_dict = filereader_registry.select_file_reader(dest_group_name)(os.path.join(dirpath,filename)) + # Check whether there is an available file reader + if file_dict is not None and isinstance(file_dict, dict): + if 'attributes_dict' in file_dict: + file_dict['attributes_dict'].update(data_lineage_dict.get(filename,{})) stdout = __transfer_file_dict_to_hdf5(h5file, group_name, file_dict) diff --git a/utils/g5505_utils.py b/utils/g5505_utils.py index a145df1175abce203a57c4c4e8743644f31a09e0..b41327197478393ccb43b34cf16f42e0905a4d86 100644 --- a/utils/g5505_utils.py +++ b/utils/g5505_utils.py @@ -161,6 +161,8 @@ def convert_dataframe_to_np_structured_array(df: pd.DataFrame): dtype.append((col, 'i4')) # Assuming 32-bit integer elif pd.api.types.is_float_dtype(col_dtype): dtype.append((col, 'f4')) # Assuming 32-bit float + elif pd.api.types.is_bool_dtype(col_dtype): + dtype.append((col,bool)) else: # Handle unsupported data types print(f"Unsupported dtype found in column '{col}': {col_data.dtype}")