Source code for hed.models.df_util

""" Utilities for assembly and conversion of HED strings to different forms. """
from functools import partial
import pandas as pd
from hed.models.tabular_input import TabularInput
from hed.models.hed_string import HedString
from hed.models.definition_dict import DefinitionDict


[docs]def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=True): """ Create an array of assembled HedString objects (or list of these) of the same length as tabular file input. Parameters: tabular_file (TabularInput): Represents the tabular input file. hed_schema (HedSchema): If str, will attempt to load as a version if it doesn't have a valid extension. extra_def_dicts: list of DefinitionDict, optional Any extra DefinitionDict objects to use when parsing the HED tags. defs_expanded (bool): (Default True) Expands definitions if True, otherwise shrinks them. Returns: tuple: hed_strings(list of HedStrings): A list of HedStrings or a list of lists of HedStrings def_dict(DefinitionDict): The definitions from this Sidecar. """ def_dict = tabular_file.get_def_dict(hed_schema, extra_def_dicts=extra_def_dicts) if defs_expanded: return [HedString(x, hed_schema, def_dict).expand_defs() for x in tabular_file.series_a], def_dict else: return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict
[docs]def convert_to_form(df, hed_schema, tag_form, columns=None): """ Convert all tags in underlying dataframe to the specified form (in place). Parameters: df (pd.Dataframe or pd.Series): The dataframe or series to modify. hed_schema (HedSchema): The schema to use to convert tags. tag_form(str): HedTag property to convert tags to. columns (list): The columns to modify on the dataframe. """ if isinstance(df, pd.Series): df[:] = df.apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form)) else: if columns is None: columns = df.columns for column in columns: df[column] = df[column].apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))
[docs]def shrink_defs(df, hed_schema, columns=None): """ Shrink (in place) any def-expand tags found in the specified columns in the dataframe. Parameters: df (pd.Dataframe or pd.Series): The dataframe or series to modify. hed_schema (HedSchema or None): The schema to use to identify defs. columns (list or None): The columns to modify on the dataframe. """ if isinstance(df, pd.Series): mask = df.str.contains('Def-expand/', case=False) df[mask] = df[mask].apply(partial(_shrink_defs, hed_schema=hed_schema)) else: if columns is None: columns = df.columns for column in columns: mask = df[column].str.contains('Def-expand/', case=False) df[column][mask] = df[column][mask].apply(partial(_shrink_defs, hed_schema=hed_schema))
[docs]def expand_defs(df, hed_schema, def_dict, columns=None): """ Expands any def tags found in the dataframe. Converts in place Parameters: df (pd.Dataframe or pd.Series): The dataframe or series to modify. hed_schema (HedSchema or None): The schema to use to identify defs. def_dict (DefinitionDict): The definitions to expand. columns (list or None): The columns to modify on the dataframe. """ if isinstance(df, pd.Series): mask = df.str.contains('Def/', case=False) df[mask] = df[mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict)) else: if columns is None: columns = df.columns for column in columns: mask = df[column].str.contains('Def/', case=False) df.loc[mask, column] = df.loc[mask, column].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))
def _convert_to_form(hed_string, hed_schema, tag_form): return str(HedString(hed_string, hed_schema).get_as_form(tag_form)) def _shrink_defs(hed_string, hed_schema): return str(HedString(hed_string, hed_schema).shrink_defs()) def _expand_defs(hed_string, hed_schema, def_dict): return str(HedString(hed_string, hed_schema, def_dict).expand_defs())
[docs]def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs=None): """ Gather def-expand tags in the strings/compare with known definitions to find any differences. Parameters: hed_strings (list or pd.Series): A list of HED strings to process. hed_schema (HedSchema): The schema to use. known_defs (DefinitionDict or list or str or None): A DefinitionDict or anything its constructor takes. These are the known definitions going in, that must match perfectly. ambiguous_defs (dict): A dictionary containing ambiguous definitions. format TBD. Currently def name key: list of lists of HED tags values Returns: tuple: A tuple containing the DefinitionDict, ambiguous definitions, and errors. """ from hed.models.def_expand_gather import DefExpandGatherer def_gatherer = DefExpandGatherer(hed_schema, known_defs, ambiguous_defs) return def_gatherer.process_def_expands(hed_strings)
[docs]def sort_dataframe_by_onsets(df): """ Gather def-expand tags in the strings/compare with known definitions to find any differences. Parameters: df(pd.Dataframe): Dataframe to sort. Returns: The sorted dataframe, or the original dataframe if it didn't have an onset column. """ if "onset" in df.columns: # Create a copy and sort by onsets as floats(if needed), but continue to keep the string version. df_copy = df.copy() df_copy['_temp_onset_sort'] = df_copy['onset'].astype(float) df_copy.sort_values(by='_temp_onset_sort', inplace=True) df_copy.drop(columns=['_temp_onset_sort'], inplace=True) return df_copy return df