Merge branch 'main' of https://gitlab.psi.ch/5505-public/dima

2cdd6925 · florez_j · bc1d65d4 · df0aca97 · 2cdd6925 · 2cdd6925
Commit 2cdd6925 authored 1 month ago by florez_j
--- a/src/hdf5_writer.py
+++ b/src/hdf5_writer.py
@@ -7,6 +7,7 @@ import pandas as pd
 import numpy as np
 import h5py
 import logging
+import json
 import utils.g5505_utils as utils
 import instruments.filereader_registry as filereader_registry
@@ -209,11 +210,22 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
                    stdout = inst
                    logging.error('Failed to create group %s into HDF5: %s', group_name, inst)
+                if 'data_lineage_metadata.json' in filtered_filenames_list:
+                    idx = filtered_filenames_list.index('data_lineage_metadata.json') 
+                    data_lineage_file = filtered_filenames_list[idx]
+                    try:
+                        with open('/'.join([dirpath,data_lineage_file]),'r') as dlf:                        
+                            data_lineage_dict = json.load(dlf)
+                        filtered_filenames_list.pop(idx)
+                    except json.JSONDecodeError:
+                            data_lineage_dict = {}  # Start fresh if file is invalid
+                else:
+                    data_lineage_dict = {}                
                for filenumber, filename in enumerate(filtered_filenames_list):
-                    #file_ext = os.path.splitext(filename)[1]
-                    #try: 
                    # hdf5 path to filename group 
                    dest_group_name = f'{group_name}/{filename}'
@@ -221,6 +233,10 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
                        #file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename))
                        #file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
                        file_dict = filereader_registry.select_file_reader(dest_group_name)(os.path.join(dirpath,filename))
+                        # Check whether there is an available file reader 
+                        if file_dict is not None and isinstance(file_dict, dict):
+                            if 'attributes_dict' in file_dict:
+                                file_dict['attributes_dict'].update(data_lineage_dict.get(filename,{}))
                        stdout = __transfer_file_dict_to_hdf5(h5file, group_name, file_dict)

--- a/utils/g5505_utils.py
+++ b/utils/g5505_utils.py
@@ -161,6 +161,8 @@ def convert_dataframe_to_np_structured_array(df: pd.DataFrame):
                dtype.append((col, 'i4'))  # Assuming 32-bit integer
            elif pd.api.types.is_float_dtype(col_dtype):
                dtype.append((col, 'f4'))  # Assuming 32-bit float
+            elif pd.api.types.is_bool_dtype(col_dtype):
+                dtype.append((col,bool))
            else:
                # Handle unsupported data types
                print(f"Unsupported dtype found in column '{col}': {col_data.dtype}")