Code indexing in gitaly is broken and leads to code not being visible to the user. We work on the issue with highest priority.

Skip to content
Snippets Groups Projects

Make file reader selection case insensitive by using ext.lower() and update...

Closed florez_j requested to merge feature/DB_for_FileReader_Repo into main
7 files
+ 494
84
Compare changes
  • Side-by-side
  • Inline
Files
7
+ 101
0
import sys
import os
try:
thisFilePath = os.path.abspath(__file__)
except NameError:
print("Error: __file__ is not available. Ensure the script is being run from a file.")
print("[Notice] Path to DIMA package may not be resolved properly.")
thisFilePath = os.getcwd() # Use current directory or specify a default
dimaPath = os.path.normpath(os.path.join(thisFilePath, "..",'..','..')) # Move up to project root
if dimaPath not in sys.path: # Avoid duplicate entries
sys.path.insert(0,dimaPath)
import pandas as pd
import collections
import json
import h5py
import argparse
import logging
import utils.g5505_utils as utils
def read_jsonflag_as_dict(path_to_file):
file_dict = {}
path_tail, path_head = os.path.split(path_to_file)
file_dict['name'] = path_head
# TODO: review this header dictionary, it may not be the best way to represent header data
file_dict['attributes_dict'] = {}
file_dict['datasets'] = []
try:
with open(path_to_file, 'r') as stream:
flag = json.load(stream)#, Loader=json.FullLoader)
except (FileNotFoundError, json.JSONDecodeError) as exc:
print(exc)
dataset = {}
dataset['name'] = 'data_table'#_numerical_variables'
dataset['data'] = utils.convert_attrdict_to_np_structured_array(flag) #df_numerical_attrs.to_numpy()
dataset['shape'] = dataset['data'].shape
dataset['dtype'] = type(dataset['data'])
file_dict['datasets'].append(dataset)
return file_dict
if __name__ == "__main__":
from src.hdf5_ops import save_file_dict_to_hdf5
from utils.g5505_utils import created_at
# Set up argument parsing
parser = argparse.ArgumentParser(description="Data ingestion process to HDF5 files.")
parser.add_argument('dst_file_path', type=str, help="Path to the target HDF5 file.")
parser.add_argument('src_file_path', type=str, help="Relative path to source file to be saved to target HDF5 file.")
parser.add_argument('dst_group_name', type=str, help="Group name '/instFolder/[category]/fileName' in the target HDF5 file.")
args = parser.parse_args()
hdf5_file_path = args.dst_file_path
src_file_path = args.src_file_path
dst_group_name = args.dst_group_name
default_mode = 'r+'
try:
# Read source file and return an internal dictionary representation
idr_dict = read_jsonflag_as_dict(src_file_path)
if not os.path.exists(hdf5_file_path):
default_mode = 'w'
print(f'Opening HDF5 file: {hdf5_file_path} in mode {default_mode}')
with h5py.File(hdf5_file_path, mode=default_mode, track_order=True) as hdf5_file_obj:
try:
# Create group if it does not exist
if dst_group_name not in hdf5_file_obj:
hdf5_file_obj.create_group(dst_group_name)
hdf5_file_obj[dst_group_name].attrs['creation_date'] = created_at().encode('utf-8')
print(f'Created new group: {dst_group_name}')
else:
print(f'Group {dst_group_name} already exists. Proceeding with data transfer...')
except Exception as inst:
logging.error('Failed to create group %s in HDF5: %s', dst_group_name, inst)
# Save dictionary to HDF5
save_file_dict_to_hdf5(hdf5_file_obj, dst_group_name, idr_dict)
print(f'Completed saving file dict with keys: {idr_dict.keys()}')
except Exception as e:
logging.error('File reader failed to process %s: %s', src_file_path, e)
print(f'File reader failed to process {src_file_path}. See logs for details.')
Loading