From ea149e75398a6b9b37ed2697560155db60d637aa Mon Sep 17 00:00:00 2001 From: Lawrence Borst Date: Wed, 19 Nov 2025 13:37:18 +0100 Subject: [PATCH 01/13] feat: add duration std to aclew metrics --- ChildProject/pipelines/metrics.py | 16 ++++++++ ChildProject/pipelines/metricsFunctions.py | 48 ++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/ChildProject/pipelines/metrics.py b/ChildProject/pipelines/metrics.py index dd2d17f9..866149d9 100644 --- a/ChildProject/pipelines/metrics.py +++ b/ChildProject/pipelines/metrics.py @@ -595,6 +595,7 @@ def __init__( segments: Union[str, pd.DataFrame] = None, by: str = "recording_filename", threads: int = 1, + include_std: bool = False, ): self.vtc = vtc @@ -619,6 +620,14 @@ def __init__( ["simple_CTC_ph",self.vtc,pd.NA], ]) + if include_std: + METRICS = np.concatenate((np.array( + [["std_voc_dur_speaker", self.vtc,'FEM'], + ["std_voc_dur_speaker", self.vtc,'MAL'], + ["std_voc_dur_speaker", self.vtc,'OCH'], + ["std_voc_dur_speaker", self.vtc,'CHI']] + ), METRICS)) + if self.alice not in am.annotations["set"].values: print(f"The ALICE set ('{self.alice}') was not found in the index.") else: @@ -652,6 +661,13 @@ def __init__( ["cp_n",self.vcm,pd.NA], ["cp_dur",self.vcm,pd.NA], ]))) + + if include_std: + METRICS = np.concatenate((METRICS, np.array( + [["std_cry_voc_dur_speaker",self.vcm,"CHI"], + ["std_can_voc_dur_speaker",self.vcm,"CHI"], + ["std_non_can_voc_dur_speaker",self.vcm,"CHI"]] + ))) METRICS = pd.DataFrame(METRICS, columns=["callable","set","speaker"]) diff --git a/ChildProject/pipelines/metricsFunctions.py b/ChildProject/pipelines/metricsFunctions.py index 3fe2628f..3af4b9ca 100644 --- a/ChildProject/pipelines/metricsFunctions.py +++ b/ChildProject/pipelines/metricsFunctions.py @@ -207,6 +207,16 @@ def avg_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> floa return segments[segments["speaker_type"] == kwargs["speaker"]]["duration"].mean() +@metricFunction({"speaker"}, {"speaker_type", "duration"}, np.nan) +def std_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> float | None: + """standard deviation of duration in milliseconds of vocalizations for a given speaker type + + Required keyword arguments: + - speaker : speaker_type to use + """ + return segments[segments["speaker_type"] == kwargs["speaker"]]["duration"].std() + + def wc_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> float: """number of words for a given speaker type @@ -350,6 +360,20 @@ def avg_cry_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> return value +@metricFunction({"speaker"}, ({"speaker_type", "vcm_type", "duration"}, {'speaker_type', "child_cry_vfx_len", "cries"}), np.nan) +def std_cry_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> float | None: + """standard deviation of duration of cry vocalizations by a given speaker type (based on vcm_type or lena cries) + + Required keyword arguments: + - speaker : speaker_type to use + """ + if 'vcm_type' in segments.columns and 'duration' in segments.columns: + value = segments.loc[(segments["speaker_type"] == kwargs["speaker"]) & + (segments["vcm_type"] == "Y")]["duration"].std() + + return value + + def can_voc_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> int: """number of canonical vocalizations for a given speaker type (based on vcm_type) @@ -395,6 +419,18 @@ def avg_can_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> return value +@metricFunction({"speaker"}, {"speaker_type", "vcm_type", "duration"}, np.nan) +def std_can_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> float | None: + """standard deviation of duration of canonical vocalizations for a given speaker type (based on vcm_type) + + Required keyword arguments: + - speaker : speaker_type to use + """ + value = segments.loc[(segments["speaker_type"] == kwargs["speaker"]) & (segments["vcm_type"] == "C")][ + "duration"].std() + return value + + def non_can_voc_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> int: """number of non-canonical vocalizations for a given speaker type (based on vcm_type) @@ -443,6 +479,18 @@ def avg_non_can_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) return value +@metricFunction({"speaker"}, {"speaker_type", "vcm_type", "duration"}, np.nan) +def std_non_can_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> float | None: + """standard deviation of duration of non-canonical vocalizations for a given speaker type (based on vcm_type) + + Required keyword arguments: + - speaker : speaker_type to use + """ + value = segments.loc[(segments["speaker_type"] == kwargs["speaker"]) & + (segments["vcm_type"] == "N")]["duration"].std() + return value + + @metricFunction(set(), set(), np.nan) def lp_n(segments: pd.DataFrame, duration: int, **kwargs) -> float: """linguistic proportion on the number of vocalizations for CHI (based on vcm_type or [cries,vfxs,utterances_count] if vcm_type does not exist) From 3fbf0d33303557bf530456685c118752418289cd Mon Sep 17 00:00:00 2001 From: Loann Peurey Date: Mon, 15 Dec 2025 17:57:45 +0100 Subject: [PATCH 02/13] fix extract_chunks --- ChildProject/pipelines/zooniverse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChildProject/pipelines/zooniverse.py b/ChildProject/pipelines/zooniverse.py index d70403f4..dcf401cf 100644 --- a/ChildProject/pipelines/zooniverse.py +++ b/ChildProject/pipelines/zooniverse.py @@ -215,7 +215,7 @@ def _create_spectrogram(data,sr): # audio = AudioSegment.from_file(source, start_second=chunk.onset / 1000, # duration=(chunk.offset - chunk.onset) / 1000, # channels=1) - chunk_audio = audio.fade_in(10).fade_out(10) + chunk_audio = audio[chunk.onset:chunk.offset].fade_in(10).fade_out(10) wav = os.path.join(self.destination, "chunks", chunk.getbasename("wav")) mp3 = os.path.join(self.destination, "chunks", chunk.getbasename("mp3")) From 99d3302246366397f0e07b91a0d3819ebb0fc749 Mon Sep 17 00:00:00 2001 From: Loann Peurey Date: Mon, 15 Dec 2025 17:59:04 +0100 Subject: [PATCH 03/13] changelog and version --- CHANGELOG.md | 6 ++++++ pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f6849f3..808c70e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +## [0.4.5] 2025-12-15 + +### Fixed + +- Extract chunks would export the entire audio + ## [0.4.4] 2025-12-08 ### Fixed diff --git a/pyproject.toml b/pyproject.toml index 87b53ada..7198a957 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "ChildProject" -version = '0.4.4' +version = '0.4.5' dependencies = [ "colorlog", "GitPython", From c19c576b8bcf452ef3fe4653fcbdd191a3cd76d4 Mon Sep 17 00:00:00 2001 From: Loann Peurey Date: Mon, 15 Dec 2025 18:30:41 +0100 Subject: [PATCH 04/13] remove macos x86_64 testing --- .github/workflows/release-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release-tests.yml b/.github/workflows/release-tests.yml index fda4ebfb..93a02138 100644 --- a/.github/workflows/release-tests.yml +++ b/.github/workflows/release-tests.yml @@ -9,7 +9,7 @@ jobs: build-macos: strategy: matrix: - os: [macos-15-intel, macos-latest] + os: [macos-latest] python-version: ['3.8', '3.9', '3.10', '3.11', '3.12','3.13'] name: Tests-macos runs-on: ${{ matrix.os }} @@ -126,4 +126,4 @@ jobs: shell: micromamba-shell {0} - name: run pytest run: pytest - shell: micromamba-shell {0} \ No newline at end of file + shell: micromamba-shell {0} From 6231059e27bbd1cc10641505ab3070c86b9f72c7 Mon Sep 17 00:00:00 2001 From: Lawrence Borst Date: Sat, 10 Jan 2026 01:02:12 +0100 Subject: [PATCH 05/13] refactor derive_annotations to allow output path added a flag that lets you specify an output path represented by the 'output_set' Public API unchanged --- ChildProject/annotations.py | 71 +++++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 10 deletions(-) diff --git a/ChildProject/annotations.py b/ChildProject/annotations.py index 43fbb923..be17b5da 100644 --- a/ChildProject/annotations.py +++ b/ChildProject/annotations.py @@ -789,8 +789,14 @@ def write(self) -> Self: return self - def _write_set_metadata(self, setname, metadata) -> Self: + def _write_set_metadata(self, setname, metadata, output_as_path: bool=False) -> Self: + if output_as_path: + with open(setname / METANNOTS, 'w') as stream: + yaml.dump(metadata, stream) + return self + assert setname in self.annotations['set'].unique(), f"set must exist" + with open(self.project.path / ANNOTATIONS / setname / METANNOTS, 'w') as stream: yaml.dump(metadata, stream) return self @@ -1083,6 +1089,7 @@ def _derive_annotation( derivator: Derivator, output_set: str, overwrite_existing: bool = False, + output_as_path = False, ) -> dict: """import and convert ``annotation``. This function should not be called outside of this class. @@ -1094,6 +1101,8 @@ def _derive_annotation( :type output_set: str :param overwrite_existing: use for lines with the same set and annotation_filename to be re-derived and overwritten :type overwrite_existing: bool + :param output_as_path: used if you want to direct your outputs to any filesystem folder, specified by `output_set` + :type output_as_path: bool :return: output annotation dictionary (attributes defined according to :ref:`ChildProject.annotations.AnnotationManager.SEGMENTS_COLUMNS`) :rtype: dict """ @@ -1106,7 +1115,10 @@ def _derive_annotation( annotation_filename = "{}_{}_{}.csv".format( source_recording, annotation["range_onset"], annotation["range_offset"] ) - output_filename = ANNOTATIONS / output_set / CONVERTED / annotation_filename + if not output_as_path: + output_filename = ANNOTATIONS / output_set / CONVERTED / annotation_filename + else: + output_filename = Path(output_set) / CONVERTED / annotation_filename # check if the annotation file already exists in dataset (same filename and same set) if self.annotations[(self.annotations['set'] == output_set) & @@ -1202,11 +1214,18 @@ def bad_derivation(annotation_dict, msg_err, error, path_file): df.sort_values(sort_columns, inplace=True) - os.makedirs( - (self.project.path / output_filename).parent, - exist_ok=True, - ) - df.to_csv(self.project.path / output_filename, index=False) + if output_as_path: + os.makedirs( + output_filename.parent, + exist_ok=True, + ) + df.to_csv(output_filename, index=False) + else: + os.makedirs( + (self.project.path / output_filename).parent, + exist_ok=True, + ) + df.to_csv(self.project.path / output_filename, index=False) annotation_result["annotation_filename"] = annotation_filename annotation_result["imported_at"] = datetime.datetime.now().strftime( @@ -1242,6 +1261,29 @@ def derive_annotations(self, :return: tuple of dataframe of derived annotations, as in :ref:`format-annotations` and dataframe of errors :rtype: tuple(pd.DataFrame, pd.DataFrame) """ + return self._derive_annotations( + input_set=input_set, + output_set=output_set, + derivation=derivation, + derivation_metadata=derivation_metadata, + threads=threads, + overwrite_existing=overwrite_existing, + output_as_path=False, + ) + + def _derive_annotations(self, + input_set: str, + output_set: str, + derivation: Union[str, Callable], + derivation_metadata=None, + threads: int = -1, + overwrite_existing: bool = False, + output_as_path: bool = False, + ) -> (pd.DataFrame, pd.DataFrame): + """ + Derive annotations. Same as the public routine, except specifying `output_as_path==True` + will direct your outputs to a chosen folder anywhere on the filesystem + """ input_processed = self.annotations[self.annotations['set'] == input_set].copy() assert not input_processed.empty, "Input set {0} does not exist,\ existing sets are in the 'set' column of {1}".format(input_set, ANNOTATIONS_CSV) @@ -1268,7 +1310,8 @@ def derive_annotations(self, partial(self._derive_annotation, derivator=derivator, output_set=output_set, - overwrite_existing=overwrite_existing + overwrite_existing=overwrite_existing, + output_as_path=output_as_path, ), axis=1 ).to_dict(orient="records") else: @@ -1278,7 +1321,8 @@ def derive_annotations(self, partial(self._derive_annotation, derivator=derivator, output_set=output_set, - overwrite_existing=overwrite_existing + overwrite_existing=overwrite_existing, + output_as_path=output_as_path, ), input_processed.to_dict(orient="records"), ) @@ -1325,7 +1369,14 @@ def derive_annotations(self, subset=["set", "recording_filename", "range_onset", "range_offset"], keep='last') # write the derived set metadata only if some lines were correctly imported if imported.shape[0]: - self._write_set_metadata(output_set, set_metadata) + self._write_set_metadata(output_set, set_metadata, output_as_path) + + if output_as_path: + # At this point the outputs are where they need to be, but the below functions will not run + # Until the set has been added to the dataset. You would have to import yourself manually + # after the fact using an automated importation (and possibly some file/folder renaming) + return imported, errors + self._read_sets_metadata() self.write() From 94709f144afb04104363c61a56ec9fd2b268bc21 Mon Sep 17 00:00:00 2001 From: LPeurey Date: Tue, 13 Jan 2026 15:08:49 +0100 Subject: [PATCH 06/13] type hints compatible with python<3.10, float is ok b/c NaN is a float also --- ChildProject/pipelines/metricsFunctions.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/ChildProject/pipelines/metricsFunctions.py b/ChildProject/pipelines/metricsFunctions.py index 3af4b9ca..78103a33 100644 --- a/ChildProject/pipelines/metricsFunctions.py +++ b/ChildProject/pipelines/metricsFunctions.py @@ -208,7 +208,7 @@ def avg_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> floa @metricFunction({"speaker"}, {"speaker_type", "duration"}, np.nan) -def std_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> float | None: +def std_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> float: """standard deviation of duration in milliseconds of vocalizations for a given speaker type Required keyword arguments: @@ -352,7 +352,7 @@ def avg_cry_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> value = segments.loc[(segments["speaker_type"] == kwargs["speaker"]) & (segments["vcm_type"] == "Y")]["duration"].mean() else: - segments = segments[segments['speaker_type'] == kwargs["speaker"]] + segments = segments[(segments['speaker_type'] == kwargs["speaker"]) & (segments["child_cry_vfx_len"] > 0)] value = segments["child_cry_vfx_len"].sum() / segments["cries"].apply(lambda x: len(ast.literal_eval(x))).sum() if pd.isnull(value): @@ -361,7 +361,7 @@ def avg_cry_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> @metricFunction({"speaker"}, ({"speaker_type", "vcm_type", "duration"}, {'speaker_type', "child_cry_vfx_len", "cries"}), np.nan) -def std_cry_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> float | None: +def std_cry_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> float: """standard deviation of duration of cry vocalizations by a given speaker type (based on vcm_type or lena cries) Required keyword arguments: @@ -370,6 +370,14 @@ def std_cry_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> if 'vcm_type' in segments.columns and 'duration' in segments.columns: value = segments.loc[(segments["speaker_type"] == kwargs["speaker"]) & (segments["vcm_type"] == "Y")]["duration"].std() + else: + segments = segments[ + (segments['speaker_type'] == kwargs["speaker"]) & (segments["child_cry_vfx_len"] > 0)].copy() + segments['cry_dur'] = segments["child_cry_vfx_len"] / segments["cries"].apply( + lambda x: len(ast.literal_eval(x))) # split duration of cry in the same voc + segments['num'] = segments["cries"].apply(lambda x: ast.literal_eval(x)) # have a array to explode + segments = segments.explode('num') + value = segments['cry_dur'].std() return value @@ -420,7 +428,7 @@ def avg_can_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> @metricFunction({"speaker"}, {"speaker_type", "vcm_type", "duration"}, np.nan) -def std_can_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> float | None: +def std_can_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> float: """standard deviation of duration of canonical vocalizations for a given speaker type (based on vcm_type) Required keyword arguments: @@ -480,7 +488,7 @@ def avg_non_can_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) @metricFunction({"speaker"}, {"speaker_type", "vcm_type", "duration"}, np.nan) -def std_non_can_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> float | None: +def std_non_can_voc_dur_speaker(segments: pd.DataFrame, duration: int, **kwargs) -> float: """standard deviation of duration of non-canonical vocalizations for a given speaker type (based on vcm_type) Required keyword arguments: From 905a66aa2cc60d85489ebb619447fde725324189 Mon Sep 17 00:00:00 2001 From: LPeurey Date: Wed, 14 Jan 2026 11:59:13 +0100 Subject: [PATCH 07/13] changelog --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fba47d6..126897e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,13 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added + +- adding std (standard deviation) on the durations of vocalizations in metrics + ## [0.4.3] 2025-11-19 -## Fixed +### Fixed - in certain cases the acoustics derivation would fail b/c of a dataframe casting - empty segments going through the acoustics derivation would fail because of duplicated columns in dataframe From acb11e634275922a5237f922cc54d9479d28f3d5 Mon Sep 17 00:00:00 2001 From: Loann Peurey Date: Thu, 29 Jan 2026 18:31:04 +0100 Subject: [PATCH 08/13] catch yaml.dump errors in derivation and merge to avoid failing a working merge or derivation --- CHANGELOG.md | 4 ++++ ChildProject/annotations.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8a7f38f..2510fd17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ All notable changes to this project will be documented in this file. - adding std (standard deviation) on the durations of vocalizations in metrics +### Modified + +- Derivation and merge of sets won't fail if the writing of metannots fails, it will simply issue an error in log + ## [0.4.5] 2025-12-15 ### Fixed diff --git a/ChildProject/annotations.py b/ChildProject/annotations.py index be17b5da..d2839b4b 100644 --- a/ChildProject/annotations.py +++ b/ChildProject/annotations.py @@ -1369,7 +1369,10 @@ def _derive_annotations(self, subset=["set", "recording_filename", "range_onset", "range_offset"], keep='last') # write the derived set metadata only if some lines were correctly imported if imported.shape[0]: - self._write_set_metadata(output_set, set_metadata, output_as_path) + try: + self._write_set_metadata(output_set, set_metadata, output_as_path) + except Exception as e: + logger.error(f"Could not write set metadata for {output_set}") if output_as_path: # At this point the outputs are where they need to be, but the below functions will not run @@ -1863,7 +1866,10 @@ def merge_sets( self.write() # if the set's metadata exists already, do not write new metadata if not (self.project.path / ANNOTATIONS / output_set / METANNOTS).exists(): - self._write_set_metadata(output_set, new_set_meta) + try: + self._write_set_metadata(output_set, new_set_meta) + except Exception as e: + logger.error(f"Could not write set metadata for {output_set}") self._read_sets_metadata() return self From 8a9d878a58da3e3115d3bc2fa141a1a74c3b9f4a Mon Sep 17 00:00:00 2001 From: Loann Peurey Date: Thu, 29 Jan 2026 18:38:03 +0100 Subject: [PATCH 09/13] some validation formats changes --- CHANGELOG.md | 3 +++ ChildProject/annotations.py | 2 +- ChildProject/projects.py | 2 +- ChildProject/tables.py | 3 ++- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2510fd17..bdbfd218 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ All notable changes to this project will be documented in this file. ### Modified - Derivation and merge of sets won't fail if the writing of metannots fails, it will simply issue an error in log +- dialect element in languages column inside children.csv is not supported anymore, dialect should be indicated elsewhere +- NA is accepted in datetime elements without warning +- custom is accepted as an annotation format ## [0.4.5] 2025-12-15 diff --git a/ChildProject/annotations.py b/ChildProject/annotations.py index d2839b4b..aac52a66 100644 --- a/ChildProject/annotations.py +++ b/ChildProject/annotations.py @@ -75,7 +75,7 @@ class AnnotationManager: IndexColumn( name="format", description="input annotation format", - choices=[*converters.keys(), "NA"], + choices=[*converters.keys(), "NA", "custom"], required=False, ), IndexColumn( diff --git a/ChildProject/projects.py b/ChildProject/projects.py index a7280f3e..b72c0483 100644 --- a/ChildProject/projects.py +++ b/ChildProject/projects.py @@ -91,7 +91,7 @@ class ChildProject: ), IndexColumn( name="language", - description='language the child is exposed to if child is monolingual; small caps, indicate dialect by name or location if available; eg "france french"; "paris french"', + description='main language the child is exposed to; small caps; eg "french"; "english"', ), IndexColumn( name="languages", diff --git a/ChildProject/tables.py b/ChildProject/tables.py index 307dc761..667ba83f 100644 --- a/ChildProject/tables.py +++ b/ChildProject/tables.py @@ -231,7 +231,8 @@ def validate(self) -> Tuple[List[str], List[str]]: if column_attr.required and str(row[column_name]) != "NA": errors.append(self.msg(message)) elif column_attr.required or str(row[column_name]) != "NA": - warnings.append(self.msg(message)) + pass + #warnings.append(self.msg(message)) elif column_attr.regex: if not re.fullmatch(column_attr.regex, str(row[column_name])): message = "'{}' does not match the format required for '{}' on line {}, expected '{}'".format( From 672a73682d2b0b2eb9cc6702db0e540051897dab Mon Sep 17 00:00:00 2001 From: Loann Peurey Date: Thu, 29 Jan 2026 18:48:12 +0100 Subject: [PATCH 10/13] allow different format changes --- CHANGELOG.md | 3 ++- ChildProject/projects.py | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bdbfd218..5d76aef7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,8 @@ All notable changes to this project will be documented in this file. - Derivation and merge of sets won't fail if the writing of metannots fails, it will simply issue an error in log - dialect element in languages column inside children.csv is not supported anymore, dialect should be indicated elsewhere - NA is accepted in datetime elements without warning -- custom is accepted as an annotation format +- custom is accepted as an annotation format, monoling, normative, child_sex, start_time_accuracy and dob_accuracy +- allow innacurate and reported for dob_criterion / accuracy to reflect lack of knowledge of the participant's age ## [0.4.5] 2025-12-15 diff --git a/ChildProject/projects.py b/ChildProject/projects.py index b72c0483..daa35e17 100644 --- a/ChildProject/projects.py +++ b/ChildProject/projects.py @@ -87,7 +87,7 @@ class ChildProject: IndexColumn( name="child_sex", description="f= female, m=male", - choices=["m", "M", "f", "F"], + choices=["m", "M", "f", "F", 'NA'], ), IndexColumn( name="language", @@ -106,7 +106,7 @@ class ChildProject: IndexColumn( name="monoling", description="whether the child is monolingual (Y) or not (N)", - choices=["Y", "N"], + choices=["Y", "N", 'NA'], ), IndexColumn( name="monoling_criterion", @@ -115,7 +115,7 @@ class ChildProject: IndexColumn( name="normative", description="whether the child is normative (Y) or not (N)", - choices=["Y", "N"], + choices=["Y", "N", 'NA'], ), IndexColumn( name="normative_criterion", @@ -144,13 +144,13 @@ class ChildProject: IndexColumn( name="dob_criterion", description="determines whether the date of birth is known exactly or extrapolated e.g. from the age. Dates of birth are assumed to be known exactly if this column is NA or unspecified.", - choices=["extrapolated", "exact"], + choices=["extrapolated", "exact", 'reported', 'innacurate'], required=False, ), IndexColumn( name="dob_accuracy", description="date of birth accuracy", - choices=["day", "week", "month", "year", "other"], + choices=["day", "week", "month", "year", "other", "innacurate", 'NA'], # innacurate shows the dob isn't representative of the child's age; analysis should not use the age of the participant ), IndexColumn( name="discard", @@ -243,7 +243,7 @@ class ChildProject: IndexColumn( name="start_time_accuracy", description="Accuracy of start_time for this recording. If not specified, assumes second-accuray.", - choices=["second", "minute", "hour", "reliable"], + choices=["second", "minute", "hour", "reliable", 'NA'], ), IndexColumn( name="noisy_setting", From d771b2a68edb77e492cf30fa9f3756398cd5168c Mon Sep 17 00:00:00 2001 From: Loann Peurey Date: Thu, 29 Jan 2026 18:55:24 +0100 Subject: [PATCH 11/13] json dumps set to list --- ChildProject/cmdline.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ChildProject/cmdline.py b/ChildProject/cmdline.py index 654b6bcc..066a42cf 100755 --- a/ChildProject/cmdline.py +++ b/ChildProject/cmdline.py @@ -28,6 +28,11 @@ import random import logging import json +class SetEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, set): + return list(obj) + return json.JSONEncoder.default(self, obj) # add this to setup,py in the requires section and in requirements.txt import colorlog @@ -693,7 +698,7 @@ def overview(args) -> int: logger.info(output) if args.format == 'json': - logger.info(json.dumps(dict)) + logger.info(json.dumps(dict, cls=SetEncoder)) return 0 From 6e2dd5eb0c3a2406f85cdc2ebbd1da31f77bb8b2 Mon Sep 17 00:00:00 2001 From: Loann Peurey Date: Mon, 2 Feb 2026 12:00:21 +0100 Subject: [PATCH 12/13] do not allow pandas >3.0 yet --- ChildProject/projects.py | 1 - pyproject.toml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ChildProject/projects.py b/ChildProject/projects.py index daa35e17..7cc0cb8a 100644 --- a/ChildProject/projects.py +++ b/ChildProject/projects.py @@ -450,7 +450,6 @@ def read(self, verbose=False, accumulate=True) -> Self: verbose, ) - # breakpoint() if self.ignore_discarded and "discard" in self.ct.df: self.ct.df['discard'] = pd.to_numeric(self.ct.df["discard"], errors='coerce').fillna(0).astype('Int64').astype('string') self.discarded_children = self.ct.df[self.ct.df["discard"] == '1'] diff --git a/pyproject.toml b/pyproject.toml index 7198a957..95429735 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ dependencies = [ "matplotlib", "nltk", "numpy>=1.17", - "pandas>=2.0.0,<=3.0.0", + "pandas>=2.0.0,<3.0.0", "panoptes_client", "praat-parselmouth", "pyannote.core", From fa9e80516bb6b2197b0a1db559263cee70321682 Mon Sep 17 00:00:00 2001 From: Loann Peurey Date: Mon, 2 Feb 2026 12:20:48 +0100 Subject: [PATCH 13/13] v0.5.6 --- CHANGELOG.md | 2 ++ pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d76aef7..3fd7cdca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +## [0.4.6] 2026-02-02 + ### Added - adding std (standard deviation) on the durations of vocalizations in metrics diff --git a/pyproject.toml b/pyproject.toml index 95429735..1752f4b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "ChildProject" -version = '0.4.5' +version = '0.4.6' dependencies = [ "colorlog", "GitPython",