From 66ff7006132436e321ea8889ea79c637d305a29a Mon Sep 17 00:00:00 2001 From: D-VR <26770468+D-VR@users.noreply.github.com> Date: Tue, 30 Jun 2026 21:09:37 +0200 Subject: [PATCH 1/5] Fix name-clash collision dropping same-named files Files added with name_clash_id=None (direct external links, direct Moodle content files, and embedded videojs nodes) all hashed md5("None") when disambiguating same-named siblings, so two distinct same-named files resolved to one path and one was silently lost. Add Node._clash_suffix(), which falls back to the node's URL when name_clash_id is None. At the rename branch the siblings differ precisely because their URLs differ, so the URL is a present, distinct key. --- syncmymoodle/__main__.py | 34 ++++++++++++---------------- tests/test_course_prefix_handling.py | 29 ++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/syncmymoodle/__main__.py b/syncmymoodle/__main__.py index fe47223..001d6ff 100755 --- a/syncmymoodle/__main__.py +++ b/syncmymoodle/__main__.py @@ -195,6 +195,17 @@ def go_to_path(self, target_path): raise Exception("The path is not found in this root node. Wrong path?") return target_node[-1] + def _clash_suffix(self): + # Stable, distinct suffix used to disambiguate same-named siblings. + # Fall back to the URL when no name_clash_id is set (direct-link, + # embedded, and direct-content file nodes pass name_clash_id=None); + # otherwise such nodes would all hash to md5("None") and collide onto + # the same path, silently dropping all but one file. + key = self.name_clash_id if self.name_clash_id is not None else self.url + return base64.urlsafe_b64encode( + hashlib.md5(str(key).encode("utf-8")).hexdigest().encode("utf-8") + ).decode()[:10] + def remove_children_nameclashes(self): # Check for duplicate filenames @@ -248,30 +259,15 @@ def remove_children_nameclashes(self): ) ] if len(siblings) > 0: - # if a filename is still duplicate in its directory, we rename it by appending its id (urlsafe base64 so it also works for urls). + # if a filename is still duplicate in its directory, we rename + # it by appending a stable per-node key (works for ids and urls). filename = Path(child.name) child.name = ( - filename.stem - + "_" - + base64.urlsafe_b64encode( - hashlib.md5(str(child.name_clash_id).encode("utf-8")) - .hexdigest() - .encode("utf-8") - ).decode()[:10] - + filename.suffix + filename.stem + "_" + child._clash_suffix() + filename.suffix ) for s in siblings: filename = Path(s.name) - s.name = ( - filename.stem - + "_" - + base64.urlsafe_b64encode( - hashlib.md5(str(s.name_clash_id).encode("utf-8")) - .hexdigest() - .encode("utf-8") - ).decode()[:10] - + filename.suffix - ) + s.name = filename.stem + "_" + s._clash_suffix() + filename.suffix self.children.remove(s) unclashed_children.extend(siblings) diff --git a/tests/test_course_prefix_handling.py b/tests/test_course_prefix_handling.py index 2cd9b98..a01ebef 100644 --- a/tests/test_course_prefix_handling.py +++ b/tests/test_course_prefix_handling.py @@ -116,3 +116,32 @@ def test_same_name_with_different_urls_still_gets_stable_suffixes(): assert "Slides" not in names for name in names: assert name.startswith("Slides_") + + +def test_clashing_files_without_name_clash_id_use_url_for_distinct_names(): + # Direct-link / direct-content / embedded nodes pass name_clash_id=None. + # Two such same-named files with different URLs must still get distinct + # names (falling back to the URL) instead of both hashing md5("None"). + root = Node("", -1, "Root", None) + section = root.add_child("General", None, "Section") + section.add_child( + "slides.pdf", + None, + "Linked file [application/pdf]", + url="https://a.example/slides.pdf", + name_clash_id=None, + ) + section.add_child( + "slides.pdf", + None, + "Linked file [application/pdf]", + url="https://b.example/slides.pdf", + name_clash_id=None, + ) + + root.remove_children_nameclashes() + + names = [child.name for child in section.children] + assert len(names) == 2 + assert len(set(names)) == 2 + assert "slides.pdf" not in names From 810d1e8290c4fc27c7bcdc3bf9ddac501bb85fb1 Mon Sep 17 00:00:00 2001 From: D-VR <26770468+D-VR@users.noreply.github.com> Date: Tue, 30 Jun 2026 21:11:05 +0200 Subject: [PATCH 2/5] Match selected_courses/skip_courses by exact id, with selected overriding skip Course filtering matched configured URLs with `str(course_id) in url`, a substring test: selecting course 12 also pulled in courses 1 and 2, and skipping 12 silently dropped 1 and 2. Add _course_id_in_filter(), which compares the parsed `id` query parameter (or a bare numeric entry) exactly. Also make selected_courses a true allowlist that overrides skip_courses (and only_sync_semester), as documented; previously skip_courses was applied first and could not be overridden. Harden shortname/idnumber access so a missing or empty field no longer aborts the sync or creates an empty-named semester folder. --- syncmymoodle/__main__.py | 58 +++++++++++++++++++----------------- tests/test_sync_filtering.py | 47 +++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 27 deletions(-) diff --git a/syncmymoodle/__main__.py b/syncmymoodle/__main__.py index 001d6ff..ab5e2c2 100755 --- a/syncmymoodle/__main__.py +++ b/syncmymoodle/__main__.py @@ -645,6 +645,23 @@ def _as_list(self, value): return value return [value] + def _course_id_in_filter(self, course_id, entries) -> bool: + """Return True if ``course_id`` is referenced by a configured entry. + + Entries are course URLs (``.../course/view.php?id=NNN``). The ``id`` + query parameter is compared exactly, so e.g. ``id=12`` does not also + match courses ``1`` or ``2``. A bare numeric id entry is also accepted. + """ + course_id = str(course_id) + for entry in entries or []: + entry = str(entry) + parsed = urllib.parse.urlparse(entry) + if course_id in urllib.parse.parse_qs(parsed.query).get("id", []): + return True + if entry.strip() == course_id: + return True + return False + def _configured_patterns(self, *keys, course_id=None): patterns = [] for key in keys: @@ -1371,39 +1388,26 @@ def sync(self): # Syncing all courses for course in self.get_all_courses(): - course_name = self._format_course_name(course["shortname"]) + course_name = self._format_course_name( + course.get("shortname") or f"course-{course.get('id')}" + ) course_id = course["id"] - if ( - len( - [ - c - for c in self.config.get("skip_courses", []) - if str(course_id) in c - ] - ) - > 0 - ): - continue - - # Skip not selected courses - if ( - len(self.config.get("selected_courses", [])) > 0 - and len( - [ - c - for c in self.config.get("selected_courses", []) - if str(course["id"]) in c - ] - ) - == 0 + selected_courses = self.config.get("selected_courses", []) + if selected_courses: + # selected_courses is an explicit allowlist that overrides + # skip_courses (and, below, only_sync_semester). + if not self._course_id_in_filter(course_id, selected_courses): + continue + elif self._course_id_in_filter( + course_id, self.config.get("skip_courses", []) ): continue - semestername = course["idnumber"][:4] - # Skip not selected semesters + semestername = (course.get("idnumber") or "")[:4] or "unknown-semester" + # Skip not selected semesters (selected_courses overrides this) if ( - len(self.config.get("selected_courses", [])) == 0 + not selected_courses and self.config.get("only_sync_semester", []) and semestername not in self.config.get("only_sync_semester", []) ): diff --git a/tests/test_sync_filtering.py b/tests/test_sync_filtering.py index e9d7ee1..693456a 100644 --- a/tests/test_sync_filtering.py +++ b/tests/test_sync_filtering.py @@ -49,3 +49,50 @@ def test_skip_courses_and_semester_filter_limit_synced_courses(): "Semester | 26ss | | | ", "Course | 26ss/Current Semester | | | ", ] + + +# Course ids that are substrings of one another, to pin down exact-id matching. +SUBSTRING_COURSES = [ + {"id": 1, "shortname": "Course One", "idnumber": "26ss-1"}, + {"id": 2, "shortname": "Course Two", "idnumber": "26ss-2"}, + {"id": 12, "shortname": "Course Twelve", "idnumber": "26ss-12"}, + {"id": 123, "shortname": "Course OneTwoThree", "idnumber": "26ss-123"}, +] + + +def _run_filter(config): + synced = [] + syncer = make_syncer(config) + syncer.get_all_courses = lambda: SUBSTRING_COURSES # type: ignore[method-assign] + syncer.get_course = lambda course_id: synced.append(course_id) or [] # type: ignore[method-assign] + syncer.session = FakeSession() + syncer.sync() + return synced + + +def test_selected_courses_match_by_exact_id_not_substring(): + # Selecting course 12 must not also pull in courses 1 and 2. + synced = _run_filter( + {"selected_courses": ["https://moodle.rwth-aachen.de/course/view.php?id=12"]} + ) + assert synced == [12] + + +def test_skip_courses_match_by_exact_id_not_substring(): + # Skipping course 12 must not silently drop courses 1 and 2. + synced = _run_filter( + {"skip_courses": ["https://moodle.rwth-aachen.de/course/view.php?id=12"]} + ) + assert synced == [1, 2, 123] + + +def test_bare_numeric_id_entry_is_accepted(): + synced = _run_filter({"selected_courses": ["12"]}) + assert synced == [12] + + +def test_selected_courses_override_skip_courses(): + # A course present in both lists is synced: selected_courses wins. + url = "https://moodle.rwth-aachen.de/course/view.php?id=12" + synced = _run_filter({"selected_courses": [url], "skip_courses": [url]}) + assert synced == [12] From bf4fb2713e89205a0ca7a90872860a66dd1cc835 Mon Sep 17 00:00:00 2001 From: D-VR <26770468+D-VR@users.noreply.github.com> Date: Tue, 30 Jun 2026 21:11:56 +0200 Subject: [PATCH 3/5] Harden login flow against unexpected page structure The SSO/TOTP login and wstoken parsing assumed Moodle's exact HTML, so a changed or error page raised AttributeError/TypeError instead of the intended clean exit. Null-check the sesskey