LAAC-LSCP · LoannPeurey · Feb 2, 2026 · Jan 29, 2026 · Jan 29, 2026 · Jan 29, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,14 @@ All notable changes to this project will be documented in this file.
 
 - adding std (standard deviation) on the durations of vocalizations in metrics
 
+### Modified
+
+- Derivation and merge of sets won't fail if the writing of metannots fails, it will simply issue an error in log
+- dialect element in languages column inside children.csv is not supported anymore, dialect should be indicated elsewhere
+- NA is accepted in datetime elements without warning
+- custom is accepted as an annotation format, monoling, normative, child_sex, start_time_accuracy and dob_accuracy
+- allow innacurate and reported for dob_criterion / accuracy to reflect lack of knowledge of the participant's age
+
 ## [0.4.5] 2025-12-15
 
 ### Fixed

diff --git a/ChildProject/annotations.py b/ChildProject/annotations.py
@@ -75,7 +75,7 @@ class AnnotationManager:
         IndexColumn(
             name="format",
             description="input annotation format",
-            choices=[*converters.keys(), "NA"],
+            choices=[*converters.keys(), "NA", "custom"],
             required=False,
         ),
         IndexColumn(
@@ -1369,7 +1369,10 @@ def _derive_annotations(self,
             subset=["set", "recording_filename", "range_onset", "range_offset"], keep='last')
         # write the derived set metadata only if some lines were correctly imported
         if imported.shape[0]:
-            self._write_set_metadata(output_set, set_metadata, output_as_path)
+            try:
+                self._write_set_metadata(output_set, set_metadata, output_as_path)
+            except Exception as e:
+                logger.error(f"Could not write set metadata for {output_set}")
 
         if output_as_path:
             # At this point the outputs are where they need to be, but the below functions will not run
@@ -1863,7 +1866,10 @@ def merge_sets(
         self.write()
         # if the set's metadata exists already, do not write new metadata
         if not (self.project.path / ANNOTATIONS / output_set / METANNOTS).exists():
-            self._write_set_metadata(output_set, new_set_meta)
+            try:
+                self._write_set_metadata(output_set, new_set_meta)
+            except Exception as e:
+                logger.error(f"Could not write set metadata for {output_set}")
         self._read_sets_metadata()
 
         return self

diff --git a/ChildProject/cmdline.py b/ChildProject/cmdline.py
@@ -28,6 +28,11 @@
 import random
 import logging
 import json
+class SetEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, set):
+            return list(obj)
+        return json.JSONEncoder.default(self, obj)
 
 # add this to setup,py in the requires section and in requirements.txt
 import colorlog
@@ -693,7 +698,7 @@ def overview(args) -> int:
             logger.info(output)
 
     if args.format == 'json':
-        logger.info(json.dumps(dict))
+        logger.info(json.dumps(dict, cls=SetEncoder))
 
     return 0
 

diff --git a/ChildProject/projects.py b/ChildProject/projects.py
@@ -87,11 +87,11 @@ class ChildProject:
         IndexColumn(
             name="child_sex",
             description="f= female, m=male",
-            choices=["m", "M", "f", "F"],
+            choices=["m", "M", "f", "F", 'NA'],
         ),
         IndexColumn(
             name="language",
-            description='language the child is exposed to if child is monolingual; small caps, indicate dialect by name or location if available; eg "france french"; "paris french"',
+            description='main language the child is exposed to; small caps; eg "french"; "english"',
         ),
         IndexColumn(
             name="languages",
@@ -106,7 +106,7 @@ class ChildProject:
         IndexColumn(
             name="monoling",
             description="whether the child is monolingual (Y) or not (N)",
-            choices=["Y", "N"],
+            choices=["Y", "N", 'NA'],
         ),
         IndexColumn(
             name="monoling_criterion",
@@ -115,7 +115,7 @@ class ChildProject:
         IndexColumn(
             name="normative",
             description="whether the child is normative (Y) or not (N)",
-            choices=["Y", "N"],
+            choices=["Y", "N", 'NA'],
         ),
         IndexColumn(
             name="normative_criterion",
@@ -144,13 +144,13 @@ class ChildProject:
         IndexColumn(
             name="dob_criterion",
             description="determines whether the date of birth is known exactly or extrapolated e.g. from the age. Dates of birth are assumed to be known exactly if this column is NA or unspecified.",
-            choices=["extrapolated", "exact"],
+            choices=["extrapolated", "exact", 'reported', 'innacurate'],
             required=False,
         ),
         IndexColumn(
             name="dob_accuracy",
             description="date of birth accuracy",
-            choices=["day", "week", "month", "year", "other"],
+            choices=["day", "week", "month", "year", "other", "innacurate", 'NA'], # innacurate shows the dob isn't representative of the child's age; analysis should not use the age of the participant
         ),
         IndexColumn(
             name="discard",
@@ -243,7 +243,7 @@ class ChildProject:
         IndexColumn(
             name="start_time_accuracy",
             description="Accuracy of start_time for this recording. If not specified, assumes second-accuray.",
-            choices=["second", "minute", "hour", "reliable"],
+            choices=["second", "minute", "hour", "reliable", 'NA'],
         ),
         IndexColumn(
             name="noisy_setting",
@@ -450,7 +450,6 @@ def read(self, verbose=False, accumulate=True) -> Self:
                 verbose,
             )
 
-        # breakpoint()
         if self.ignore_discarded and "discard" in self.ct.df:
             self.ct.df['discard'] = pd.to_numeric(self.ct.df["discard"], errors='coerce').fillna(0).astype('Int64').astype('string')
             self.discarded_children = self.ct.df[self.ct.df["discard"] == '1']

diff --git a/ChildProject/tables.py b/ChildProject/tables.py
@@ -231,7 +231,8 @@ def validate(self) -> Tuple[List[str], List[str]]:
                         if column_attr.required and str(row[column_name]) != "NA":
                             errors.append(self.msg(message))
                         elif column_attr.required or str(row[column_name]) != "NA":
-                            warnings.append(self.msg(message))
+                            pass
+                            #warnings.append(self.msg(message))
                 elif column_attr.regex:
                     if not re.fullmatch(column_attr.regex, str(row[column_name])):
                         message = "'{}' does not match the format required for '{}' on line {}, expected '{}'".format(

diff --git a/pyproject.toml b/pyproject.toml
@@ -13,7 +13,7 @@ dependencies = [
   "matplotlib",
   "nltk",
   "numpy>=1.17",
-  "pandas>=2.0.0,<=3.0.0",
+  "pandas>=2.0.0,<3.0.0",
   "panoptes_client",
   "praat-parselmouth",
   "pyannote.core",