""" Utilities for assembly and conversion of HED strings to different forms. """
from functools import partial
import pandas as pd
from hed.models.tabular_input import TabularInput
from hed.models.hed_string import HedString
from hed.models.definition_dict import DefinitionDict
[docs]def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=True):
""" Create an array of assembled HedString objects (or list of these) of the same length as tabular file input.
Parameters:
tabular_file (TabularInput): Represents the tabular input file.
hed_schema (HedSchema): If str, will attempt to load as a version if it doesn't have a valid extension.
extra_def_dicts: list of DefinitionDict, optional
Any extra DefinitionDict objects to use when parsing the HED tags.
defs_expanded (bool): (Default True) Expands definitions if True, otherwise shrinks them.
Returns:
tuple:
hed_strings(list of HedStrings): A list of HedStrings or a list of lists of HedStrings
def_dict(DefinitionDict): The definitions from this Sidecar.
"""
def_dict = tabular_file.get_def_dict(hed_schema, extra_def_dicts=extra_def_dicts)
if defs_expanded:
return [HedString(x, hed_schema, def_dict).expand_defs() for x in tabular_file.series_a], def_dict
else:
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict
[docs]def shrink_defs(df, hed_schema, columns=None):
""" Shrink (in place) any def-expand tags found in the specified columns in the dataframe.
Parameters:
df (pd.Dataframe or pd.Series): The dataframe or series to modify.
hed_schema (HedSchema or None): The schema to use to identify defs.
columns (list or None): The columns to modify on the dataframe.
"""
if isinstance(df, pd.Series):
mask = df.str.contains('Def-expand/', case=False)
df[mask] = df[mask].apply(partial(_shrink_defs, hed_schema=hed_schema))
else:
if columns is None:
columns = df.columns
for column in columns:
mask = df[column].str.contains('Def-expand/', case=False)
df[column][mask] = df[column][mask].apply(partial(_shrink_defs, hed_schema=hed_schema))
[docs]def expand_defs(df, hed_schema, def_dict, columns=None):
""" Expands any def tags found in the dataframe.
Converts in place
Parameters:
df (pd.Dataframe or pd.Series): The dataframe or series to modify.
hed_schema (HedSchema or None): The schema to use to identify defs.
def_dict (DefinitionDict): The definitions to expand.
columns (list or None): The columns to modify on the dataframe.
"""
if isinstance(df, pd.Series):
mask = df.str.contains('Def/', case=False)
df[mask] = df[mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))
else:
if columns is None:
columns = df.columns
for column in columns:
mask = df[column].str.contains('Def/', case=False)
df.loc[mask, column] = df.loc[mask, column].apply(partial(_expand_defs,
hed_schema=hed_schema, def_dict=def_dict))
def _convert_to_form(hed_string, hed_schema, tag_form):
return str(HedString(hed_string, hed_schema).get_as_form(tag_form))
def _shrink_defs(hed_string, hed_schema):
return str(HedString(hed_string, hed_schema).shrink_defs())
def _expand_defs(hed_string, hed_schema, def_dict):
return str(HedString(hed_string, hed_schema, def_dict).expand_defs())
[docs]def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs=None):
""" Gather def-expand tags in the strings/compare with known definitions to find any differences.
Parameters:
hed_strings (list or pd.Series): A list of HED strings to process.
hed_schema (HedSchema): The schema to use.
known_defs (DefinitionDict or list or str or None):
A DefinitionDict or anything its constructor takes. These are the known definitions going in, that must
match perfectly.
ambiguous_defs (dict): A dictionary containing ambiguous definitions.
format TBD. Currently def name key: list of lists of HED tags values
Returns:
tuple: A tuple containing the DefinitionDict, ambiguous definitions, and errors.
"""
from hed.models.def_expand_gather import DefExpandGatherer
def_gatherer = DefExpandGatherer(hed_schema, known_defs, ambiguous_defs)
return def_gatherer.process_def_expands(hed_strings)
[docs]def sort_dataframe_by_onsets(df):
""" Gather def-expand tags in the strings/compare with known definitions to find any differences.
Parameters:
df(pd.Dataframe): Dataframe to sort.
Returns:
The sorted dataframe, or the original dataframe if it didn't have an onset column.
"""
if "onset" in df.columns:
# Create a copy and sort by onsets as floats(if needed), but continue to keep the string version.
df_copy = df.copy()
df_copy['_temp_onset_sort'] = df_copy['onset'].astype(float)
df_copy.sort_values(by='_temp_onset_sort', inplace=True)
df_copy.drop(columns=['_temp_onset_sort'], inplace=True)
return df_copy
return df