Code indexing in gitaly is broken and leads to code not being visible to the user. We work on the issue with highest priority.

Skip to content
Snippets Groups Projects
Commit 6dc89d34 authored by florez_j's avatar florez_j
Browse files

Commented out metadata info about group members for a given group. This is to...

Commented out metadata info about group members for a given group. This is to simplify yaml or json representation of the metadata.
parent 60f9278f
No related branches found
No related tags found
No related merge requests found
......@@ -459,50 +459,51 @@ def get_parent_child_relationships(file: h5py.File):
def __print_metadata__(name, obj, folder_depth, yaml_dict):
# TODO: should we enable deeper folders ?
if len(obj.name.split('/')) <= folder_depth:
"""
Extracts metadata from HDF5 groups and datasets and organizes them into a dictionary with compact representation.
Parameters:
-----------
name (str): Name of the HDF5 object being inspected.
obj (h5py.Group or h5py.Dataset): The HDF5 object (Group or Dataset).
folder_depth (int): Maximum depth of folders to explore.
yaml_dict (dict): Dictionary to populate with metadata.
"""
# Process only objects within the specified folder depth
if len(obj.name.split('/')) <= folder_depth: # and ".h5" not in obj.name:
name_to_list = obj.name.split('/')
name_head = name_to_list[-1]
if isinstance(obj,h5py.Group):
#print('name:', obj.name)
#print('attributes:', dict(obj.attrs))
#attr_dict = {}
group_dict = {}
name_head = name_to_list[-1] if not name_to_list[-1]=='' else obj.name
# Convert attribute dict to a YAML/JSON serializable dict
if isinstance(obj, h5py.Group): # Handle groups
# Convert attributes to a YAML/JSON serializable format
attr_dict = {key: utils.to_serializable_dtype(val) for key, val in obj.attrs.items()}
#for key, value in obj.attrs.items():
#print (key, value.dtype)
# if key == 'Layout':
# print(value)
# Initialize the group dictionary
group_dict = {"name": name_head, "attributes": attr_dict}
# if not key in ['file_list','filtered_file_list']:
# Handle group members compactly
#subgroups = [member_name for member_name in obj if isinstance(obj[member_name], h5py.Group)]
#datasets = [member_name for member_name in obj if isinstance(obj[member_name], h5py.Dataset)]
# value = make_dtype_yaml_compatible(value)
# attr_dict[key] = {'rename_as' : key,
# 'value' : value
# }
#group_dict[obj.name] = {'name': obj.name, 'attributes': attr_dict}
group_dict = {"name": name_head, "attributes": attr_dict, "datasets":{}}
#group_dict[obj.name]["name"] = obj.name
#group_dict[obj.name]["attributes"] = attr_dict
#group_dict[obj.name]["datasets"] = {}
#print(name)
# Summarize groups and datasets
#group_dict["content_summary"] = {
# "group_count": len(subgroups),
# "group_preview": subgroups[:3] + (["..."] if len(subgroups) > 3 else []),
# "dataset_count": len(datasets),
# "dataset_preview": datasets[:3] + (["..."] if len(datasets) > 3 else [])
#}
yaml_dict[obj.name] = group_dict
elif isinstance(obj, h5py.Dataset):
# Convert attribute dict to a YAML/JSON serializable dict
attr_dict = {key: utils.to_serializable_dtype(val) for key, val in obj.attrs.items()}
parent_name = '/'.join(name_to_list[:-1])
yaml_dict[parent_name]["datasets"][name_head] = {"rename_as": name_head ,"attributes": attr_dict}
#print(yaml.dump(group_dict,sort_keys=False))
#elif len(obj.name.split('/')) == 3:
# print(yaml.dump())
elif isinstance(obj, h5py.Dataset): # Handle datasets
# Convert attributes to a YAML/JSON serializable format
attr_dict = {key: utils.to_serializable_dtype(val) for key, val in obj.attrs.items()}
dataset_dict = {"name": name_head, "attributes": attr_dict}
yaml_dict[obj.name] = dataset_dict
def serialize_metadata(input_filename_path, folder_depth: int = 4, output_format: str = 'yaml') -> str:
"""
......@@ -537,12 +538,13 @@ def serialize_metadata(input_filename_path, folder_depth: int = 4, output_format
# Open the HDF5 file and extract metadata
with h5py.File(input_filename_path, 'r') as f:
# Convert attribute dict to a YAML/JSON serializable dict
attrs_dict = {key: utils.to_serializable_dtype(val) for key, val in f.attrs.items()}
yaml_dict[f.name] = {
"name": f.name,
"attributes": attrs_dict,
"datasets": {}
}
#attrs_dict = {key: utils.to_serializable_dtype(val) for key, val in f.attrs.items()}
#yaml_dict[f.name] = {
# "name": f.name,
# "attributes": attrs_dict,
# "datasets": {}
#}
__print_metadata__(f.name, f, folder_depth, yaml_dict)
# Traverse HDF5 file hierarchy and add datasets
f.visititems(lambda name, obj: __print_metadata__(name, obj, folder_depth, yaml_dict))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment