From 5c17453ad708ac8bccf544c65f74931057eeb667 Mon Sep 17 00:00:00 2001 From: Loann Peurey Date: Thu, 29 Jan 2026 18:31:04 +0100 Subject: [PATCH 1/5] catch yaml.dump errors in derivation and merge to avoid failing a working merge or derivation --- CHANGELOG.md | 4 ++++ ChildProject/annotations.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8a7f38f..2510fd17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ All notable changes to this project will be documented in this file. - adding std (standard deviation) on the durations of vocalizations in metrics +### Modified + +- Derivation and merge of sets won't fail if the writing of metannots fails, it will simply issue an error in log + ## [0.4.5] 2025-12-15 ### Fixed diff --git a/ChildProject/annotations.py b/ChildProject/annotations.py index be17b5da..d2839b4b 100644 --- a/ChildProject/annotations.py +++ b/ChildProject/annotations.py @@ -1369,7 +1369,10 @@ def _derive_annotations(self, subset=["set", "recording_filename", "range_onset", "range_offset"], keep='last') # write the derived set metadata only if some lines were correctly imported if imported.shape[0]: - self._write_set_metadata(output_set, set_metadata, output_as_path) + try: + self._write_set_metadata(output_set, set_metadata, output_as_path) + except Exception as e: + logger.error(f"Could not write set metadata for {output_set}") if output_as_path: # At this point the outputs are where they need to be, but the below functions will not run @@ -1863,7 +1866,10 @@ def merge_sets( self.write() # if the set's metadata exists already, do not write new metadata if not (self.project.path / ANNOTATIONS / output_set / METANNOTS).exists(): - self._write_set_metadata(output_set, new_set_meta) + try: + self._write_set_metadata(output_set, new_set_meta) + except Exception as e: + logger.error(f"Could not write set metadata for {output_set}") self._read_sets_metadata() return self From a26469db41d34153aba1676906e59afd7cc077f3 Mon Sep 17 00:00:00 2001 From: Loann Peurey Date: Thu, 29 Jan 2026 18:38:03 +0100 Subject: [PATCH 2/5] some validation formats changes --- CHANGELOG.md | 3 +++ ChildProject/annotations.py | 2 +- ChildProject/projects.py | 2 +- ChildProject/tables.py | 3 ++- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2510fd17..bdbfd218 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ All notable changes to this project will be documented in this file. ### Modified - Derivation and merge of sets won't fail if the writing of metannots fails, it will simply issue an error in log +- dialect element in languages column inside children.csv is not supported anymore, dialect should be indicated elsewhere +- NA is accepted in datetime elements without warning +- custom is accepted as an annotation format ## [0.4.5] 2025-12-15 diff --git a/ChildProject/annotations.py b/ChildProject/annotations.py index d2839b4b..aac52a66 100644 --- a/ChildProject/annotations.py +++ b/ChildProject/annotations.py @@ -75,7 +75,7 @@ class AnnotationManager: IndexColumn( name="format", description="input annotation format", - choices=[*converters.keys(), "NA"], + choices=[*converters.keys(), "NA", "custom"], required=False, ), IndexColumn( diff --git a/ChildProject/projects.py b/ChildProject/projects.py index a7280f3e..b72c0483 100644 --- a/ChildProject/projects.py +++ b/ChildProject/projects.py @@ -91,7 +91,7 @@ class ChildProject: ), IndexColumn( name="language", - description='language the child is exposed to if child is monolingual; small caps, indicate dialect by name or location if available; eg "france french"; "paris french"', + description='main language the child is exposed to; small caps; eg "french"; "english"', ), IndexColumn( name="languages", diff --git a/ChildProject/tables.py b/ChildProject/tables.py index 307dc761..667ba83f 100644 --- a/ChildProject/tables.py +++ b/ChildProject/tables.py @@ -231,7 +231,8 @@ def validate(self) -> Tuple[List[str], List[str]]: if column_attr.required and str(row[column_name]) != "NA": errors.append(self.msg(message)) elif column_attr.required or str(row[column_name]) != "NA": - warnings.append(self.msg(message)) + pass + #warnings.append(self.msg(message)) elif column_attr.regex: if not re.fullmatch(column_attr.regex, str(row[column_name])): message = "'{}' does not match the format required for '{}' on line {}, expected '{}'".format( From 5493be1f90556f744c56c0b174a9f96eb7e5cdb5 Mon Sep 17 00:00:00 2001 From: Loann Peurey Date: Thu, 29 Jan 2026 18:48:12 +0100 Subject: [PATCH 3/5] allow different format changes --- CHANGELOG.md | 3 ++- ChildProject/projects.py | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bdbfd218..5d76aef7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,8 @@ All notable changes to this project will be documented in this file. - Derivation and merge of sets won't fail if the writing of metannots fails, it will simply issue an error in log - dialect element in languages column inside children.csv is not supported anymore, dialect should be indicated elsewhere - NA is accepted in datetime elements without warning -- custom is accepted as an annotation format +- custom is accepted as an annotation format, monoling, normative, child_sex, start_time_accuracy and dob_accuracy +- allow innacurate and reported for dob_criterion / accuracy to reflect lack of knowledge of the participant's age ## [0.4.5] 2025-12-15 diff --git a/ChildProject/projects.py b/ChildProject/projects.py index b72c0483..daa35e17 100644 --- a/ChildProject/projects.py +++ b/ChildProject/projects.py @@ -87,7 +87,7 @@ class ChildProject: IndexColumn( name="child_sex", description="f= female, m=male", - choices=["m", "M", "f", "F"], + choices=["m", "M", "f", "F", 'NA'], ), IndexColumn( name="language", @@ -106,7 +106,7 @@ class ChildProject: IndexColumn( name="monoling", description="whether the child is monolingual (Y) or not (N)", - choices=["Y", "N"], + choices=["Y", "N", 'NA'], ), IndexColumn( name="monoling_criterion", @@ -115,7 +115,7 @@ class ChildProject: IndexColumn( name="normative", description="whether the child is normative (Y) or not (N)", - choices=["Y", "N"], + choices=["Y", "N", 'NA'], ), IndexColumn( name="normative_criterion", @@ -144,13 +144,13 @@ class ChildProject: IndexColumn( name="dob_criterion", description="determines whether the date of birth is known exactly or extrapolated e.g. from the age. Dates of birth are assumed to be known exactly if this column is NA or unspecified.", - choices=["extrapolated", "exact"], + choices=["extrapolated", "exact", 'reported', 'innacurate'], required=False, ), IndexColumn( name="dob_accuracy", description="date of birth accuracy", - choices=["day", "week", "month", "year", "other"], + choices=["day", "week", "month", "year", "other", "innacurate", 'NA'], # innacurate shows the dob isn't representative of the child's age; analysis should not use the age of the participant ), IndexColumn( name="discard", @@ -243,7 +243,7 @@ class ChildProject: IndexColumn( name="start_time_accuracy", description="Accuracy of start_time for this recording. If not specified, assumes second-accuray.", - choices=["second", "minute", "hour", "reliable"], + choices=["second", "minute", "hour", "reliable", 'NA'], ), IndexColumn( name="noisy_setting", From 98c6cbaff8a97445c9ca88e213c2156e7366b256 Mon Sep 17 00:00:00 2001 From: Loann Peurey Date: Thu, 29 Jan 2026 18:55:24 +0100 Subject: [PATCH 4/5] json dumps set to list --- ChildProject/cmdline.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ChildProject/cmdline.py b/ChildProject/cmdline.py index 654b6bcc..066a42cf 100755 --- a/ChildProject/cmdline.py +++ b/ChildProject/cmdline.py @@ -28,6 +28,11 @@ import random import logging import json +class SetEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, set): + return list(obj) + return json.JSONEncoder.default(self, obj) # add this to setup,py in the requires section and in requirements.txt import colorlog @@ -693,7 +698,7 @@ def overview(args) -> int: logger.info(output) if args.format == 'json': - logger.info(json.dumps(dict)) + logger.info(json.dumps(dict, cls=SetEncoder)) return 0 From 69f392baa9340aed89dac79ea2c000303af48cac Mon Sep 17 00:00:00 2001 From: Loann Peurey Date: Mon, 2 Feb 2026 12:00:21 +0100 Subject: [PATCH 5/5] do not allow pandas >3.0 yet --- ChildProject/projects.py | 1 - pyproject.toml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ChildProject/projects.py b/ChildProject/projects.py index daa35e17..7cc0cb8a 100644 --- a/ChildProject/projects.py +++ b/ChildProject/projects.py @@ -450,7 +450,6 @@ def read(self, verbose=False, accumulate=True) -> Self: verbose, ) - # breakpoint() if self.ignore_discarded and "discard" in self.ct.df: self.ct.df['discard'] = pd.to_numeric(self.ct.df["discard"], errors='coerce').fillna(0).astype('Int64').astype('string') self.discarded_children = self.ct.df[self.ct.df["discard"] == '1'] diff --git a/pyproject.toml b/pyproject.toml index 7198a957..95429735 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ dependencies = [ "matplotlib", "nltk", "numpy>=1.17", - "pandas>=2.0.0,<=3.0.0", + "pandas>=2.0.0,<3.0.0", "panoptes_client", "praat-parselmouth", "pyannote.core",