diff --git a/CHANGELOG.md b/CHANGELOG.md index b8a7f38f..5d76aef7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,14 @@ All notable changes to this project will be documented in this file. - adding std (standard deviation) on the durations of vocalizations in metrics +### Modified + +- Derivation and merge of sets won't fail if the writing of metannots fails, it will simply issue an error in log +- dialect element in languages column inside children.csv is not supported anymore, dialect should be indicated elsewhere +- NA is accepted in datetime elements without warning +- custom is accepted as an annotation format, monoling, normative, child_sex, start_time_accuracy and dob_accuracy +- allow innacurate and reported for dob_criterion / accuracy to reflect lack of knowledge of the participant's age + ## [0.4.5] 2025-12-15 ### Fixed diff --git a/ChildProject/annotations.py b/ChildProject/annotations.py index be17b5da..aac52a66 100644 --- a/ChildProject/annotations.py +++ b/ChildProject/annotations.py @@ -75,7 +75,7 @@ class AnnotationManager: IndexColumn( name="format", description="input annotation format", - choices=[*converters.keys(), "NA"], + choices=[*converters.keys(), "NA", "custom"], required=False, ), IndexColumn( @@ -1369,7 +1369,10 @@ def _derive_annotations(self, subset=["set", "recording_filename", "range_onset", "range_offset"], keep='last') # write the derived set metadata only if some lines were correctly imported if imported.shape[0]: - self._write_set_metadata(output_set, set_metadata, output_as_path) + try: + self._write_set_metadata(output_set, set_metadata, output_as_path) + except Exception as e: + logger.error(f"Could not write set metadata for {output_set}") if output_as_path: # At this point the outputs are where they need to be, but the below functions will not run @@ -1863,7 +1866,10 @@ def merge_sets( self.write() # if the set's metadata exists already, do not write new metadata if not (self.project.path / ANNOTATIONS / output_set / METANNOTS).exists(): - self._write_set_metadata(output_set, new_set_meta) + try: + self._write_set_metadata(output_set, new_set_meta) + except Exception as e: + logger.error(f"Could not write set metadata for {output_set}") self._read_sets_metadata() return self diff --git a/ChildProject/cmdline.py b/ChildProject/cmdline.py index 654b6bcc..066a42cf 100755 --- a/ChildProject/cmdline.py +++ b/ChildProject/cmdline.py @@ -28,6 +28,11 @@ import random import logging import json +class SetEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, set): + return list(obj) + return json.JSONEncoder.default(self, obj) # add this to setup,py in the requires section and in requirements.txt import colorlog @@ -693,7 +698,7 @@ def overview(args) -> int: logger.info(output) if args.format == 'json': - logger.info(json.dumps(dict)) + logger.info(json.dumps(dict, cls=SetEncoder)) return 0 diff --git a/ChildProject/projects.py b/ChildProject/projects.py index a7280f3e..7cc0cb8a 100644 --- a/ChildProject/projects.py +++ b/ChildProject/projects.py @@ -87,11 +87,11 @@ class ChildProject: IndexColumn( name="child_sex", description="f= female, m=male", - choices=["m", "M", "f", "F"], + choices=["m", "M", "f", "F", 'NA'], ), IndexColumn( name="language", - description='language the child is exposed to if child is monolingual; small caps, indicate dialect by name or location if available; eg "france french"; "paris french"', + description='main language the child is exposed to; small caps; eg "french"; "english"', ), IndexColumn( name="languages", @@ -106,7 +106,7 @@ class ChildProject: IndexColumn( name="monoling", description="whether the child is monolingual (Y) or not (N)", - choices=["Y", "N"], + choices=["Y", "N", 'NA'], ), IndexColumn( name="monoling_criterion", @@ -115,7 +115,7 @@ class ChildProject: IndexColumn( name="normative", description="whether the child is normative (Y) or not (N)", - choices=["Y", "N"], + choices=["Y", "N", 'NA'], ), IndexColumn( name="normative_criterion", @@ -144,13 +144,13 @@ class ChildProject: IndexColumn( name="dob_criterion", description="determines whether the date of birth is known exactly or extrapolated e.g. from the age. Dates of birth are assumed to be known exactly if this column is NA or unspecified.", - choices=["extrapolated", "exact"], + choices=["extrapolated", "exact", 'reported', 'innacurate'], required=False, ), IndexColumn( name="dob_accuracy", description="date of birth accuracy", - choices=["day", "week", "month", "year", "other"], + choices=["day", "week", "month", "year", "other", "innacurate", 'NA'], # innacurate shows the dob isn't representative of the child's age; analysis should not use the age of the participant ), IndexColumn( name="discard", @@ -243,7 +243,7 @@ class ChildProject: IndexColumn( name="start_time_accuracy", description="Accuracy of start_time for this recording. If not specified, assumes second-accuray.", - choices=["second", "minute", "hour", "reliable"], + choices=["second", "minute", "hour", "reliable", 'NA'], ), IndexColumn( name="noisy_setting", @@ -450,7 +450,6 @@ def read(self, verbose=False, accumulate=True) -> Self: verbose, ) - # breakpoint() if self.ignore_discarded and "discard" in self.ct.df: self.ct.df['discard'] = pd.to_numeric(self.ct.df["discard"], errors='coerce').fillna(0).astype('Int64').astype('string') self.discarded_children = self.ct.df[self.ct.df["discard"] == '1'] diff --git a/ChildProject/tables.py b/ChildProject/tables.py index 307dc761..667ba83f 100644 --- a/ChildProject/tables.py +++ b/ChildProject/tables.py @@ -231,7 +231,8 @@ def validate(self) -> Tuple[List[str], List[str]]: if column_attr.required and str(row[column_name]) != "NA": errors.append(self.msg(message)) elif column_attr.required or str(row[column_name]) != "NA": - warnings.append(self.msg(message)) + pass + #warnings.append(self.msg(message)) elif column_attr.regex: if not re.fullmatch(column_attr.regex, str(row[column_name])): message = "'{}' does not match the format required for '{}' on line {}, expected '{}'".format( diff --git a/pyproject.toml b/pyproject.toml index 7198a957..95429735 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ dependencies = [ "matplotlib", "nltk", "numpy>=1.17", - "pandas>=2.0.0,<=3.0.0", + "pandas>=2.0.0,<3.0.0", "panoptes_client", "praat-parselmouth", "pyannote.core",