Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build+test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: ascmhl-build-test

on:
push:
branches: [ master, dev/windowsPathHandling ]
branches: [ master, ignore_pattern_in_nested_histories ]
pull_request:
branches: [ master ]

Expand Down
64 changes: 61 additions & 3 deletions ascmhl/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import platform

import click
import pathspec
from lxml import etree

from . import logger
Expand All @@ -35,6 +36,8 @@
from typing import Dict
from collections import namedtuple

from .utils import check_path_is_absolute_to_history


@click.command()
@click.argument("root_path", type=click.Path(exists=True))
Expand Down Expand Up @@ -231,6 +234,9 @@ def create_for_folder_subcommand(
# start a verification session on the existing history
session = MHLGenerationCreationSession(existing_history, ignore_spec)

# update the ignore spec and include ignores from nested histories
ignore_spec = get_ignore_spec_including_nested_ignores(existing_history, ignore_list, ignore_spec_file)

num_failed_verifications = 0
# store the directory hashes of sub folders so we can use it when calculating the hash of the parent folder
# the mapping lookups will follow the dictionary format of [string: [hash_format: hash_value]] where string
Expand All @@ -239,7 +245,7 @@ def create_for_folder_subcommand(
dir_structure_hash_mapping_lookup = {}
hash_format_list = sorted(hash_formats)

for folder_path, children in post_order_lexicographic(root_path, session.ignore_spec.get_path_spec()):
for folder_path, children in post_order_lexicographic(root_path, ignore_spec.get_path_spec()):
# generate directory hashes
dir_hash_context_lookup = {}

Expand Down Expand Up @@ -695,7 +701,7 @@ def verify_directory_hash_subcommand(

existing_history = MHLHistory.load_from_path(root_path)

ignore_spec = ignore.MHLIgnoreSpec(existing_history.latest_ignore_patterns(), ignore_list, ignore_spec_file)
ignore_spec = get_ignore_spec_including_nested_ignores(existing_history, ignore_list, ignore_spec_file)

# FIXME: Update once argument signature has been modified to supply a list of formats
hash_formats = []
Expand Down Expand Up @@ -1031,7 +1037,7 @@ def diff_entire_folder_against_full_history_subcommand(root_path, verbose, ignor
num_failed_verifications = 0
num_new_files = 0

ignore_spec = ignore.MHLIgnoreSpec(existing_history.latest_ignore_patterns(), ignore_list, ignore_spec_file)
ignore_spec = get_ignore_spec_including_nested_ignores(existing_history, ignore_list, ignore_spec_file)

for folder_path, children in post_order_lexicographic(root_path, ignore_spec.get_path_spec()):
for item_name, is_dir in children:
Expand Down Expand Up @@ -1572,3 +1578,55 @@ def seal_file_path(existing_history, file_path, hash_formats: [str], session) ->
hash_result_lookup[hash_format] = SealPathResult(current_hash_lookup[hash_format], success)

return hash_result_lookup


def get_ignore_spec_including_nested_ignores(existing_history, ignore_list, ignore_spec_file=None):
"""Get the ignore patterns from nested histories with their respective paths,
so that ignored files from nested histories are also ignored in this session, but are not stored
in the root ascmhl manifest"""
ignore_patterns_cumulated = ignore.default_ignore_list()
# handle non-existent ignores in root history
if existing_history.latest_ignore_patterns() is not None:
for x in existing_history.latest_ignore_patterns():
if x not in ignore.default_ignore_list():
ignore_patterns_cumulated.append(x)

for x in existing_history.latest_ignore_pattern_from_nested_histories():
ignore_patterns_cumulated.append(x)

for x in ignore_list:
ignore_patterns_cumulated.append(x)

patterns_from_file = []
if ignore_spec_file:
with open(ignore_spec_file, "r") as fh:
patterns_from_file.extend(line.rstrip("\n") for line in fh if line != "\n")
for x in patterns_from_file:
if x not in ignore_patterns_cumulated:
ignore_patterns_cumulated.append(x)

# we now build the absolute ignore paths for the current session from all nested ignores
# otherwise the post_order_lexicographic() won't ignore these paths
absolute_ignore_paths = []
path = existing_history.get_root_path()
for pattern in ignore_patterns_cumulated:
if pattern in ignore.default_ignore_list():
absolute_ignore_paths.append(pattern)
else:
if pattern.find("/") != -1:
if pattern.startswith("/"):
absolute_ignore_paths.append(path + pattern)
elif pattern.startswith("**"):
absolute_ignore_paths.append(pattern)
elif pattern.endswith("/") and pattern[:-1].find("/") == -1:
absolute_ignore_paths.append(pattern)
elif pattern.endswith("/" + "**") and pattern[:-3].find("/") == -1:
absolute_ignore_paths.append(pattern)
else:
absolute_ignore_paths.append(path + os.sep + pattern)
else:
absolute_ignore_paths.append(pattern)

normalized_paths = [pathspec.util.normalize_file(p) for p in absolute_ignore_paths]
spec = ignore.MHLIgnoreSpec(existing_history.latest_ignore_patterns(), normalized_paths)
return spec
196 changes: 192 additions & 4 deletions ascmhl/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
__email__ = "opensource@pomfort.com"
"""

import os
from collections import defaultdict
from pathlib import PureWindowsPath
from typing import Dict, List

from . import chain_xml_parser
from . import chain_xml_parser, ignore
from . import logger
from .ignore import MHLIgnoreSpec
from .hashlist import MHLHashList, MHLHashEntry, MHLCreatorInfo, MHLProcessInfo
Expand Down Expand Up @@ -296,9 +298,7 @@ def commit(self, creator_info: MHLCreatorInfo, process_info: MHLProcessInfo):
new_hash_list.process_info.root_media_hash = process_info.root_media_hash
new_hash_list.process_info.hashlist_custom_basename = process_info.hashlist_custom_basename
new_hash_list.process_info.process = process_info.process
new_hash_list.process_info.ignore_spec = MHLIgnoreSpec(
history.latest_ignore_patterns(), self.ignore_spec.get_pattern_list()
)
new_hash_list.process_info.ignore_spec = self.get_relevant_ignore_pattern(history)

history.write_new_generation(new_hash_list)
relative_generation_path = self.root_history.get_relative_file_path(new_hash_list.file_path)
Expand All @@ -307,3 +307,191 @@ def commit(self, creator_info: MHLCreatorInfo, process_info: MHLProcessInfo):
referenced_hash_lists[history.parent_history].append(new_hash_list)

chain_xml_parser.write_chain(history.chain, new_hash_list)

def get_relevant_ignore_pattern(self, history) -> MHLIgnoreSpec:
"""
Only store the relevant ignore patterns for an ascmhl-history and ignore others.
This will split the pattern into the relevant bits for the lowest ascmhl-history relative to it
"""

# get the ignore pattern from the latest history, if there is none, use the default pattern
final_ignores = []
latest_ignore_patterns = history.latest_ignore_patterns()
if latest_ignore_patterns is None:
final_ignores += ignore.default_ignore_list()
else:
final_ignores += latest_ignore_patterns

ignore_patterns = self.ignore_spec.get_pattern_list()

history_path = history.get_root_path()
# get the highest parent history to build the correct relative paths for this generation
if history.parent_history is not None:
parent_history = history.parent_history
while parent_history.parent_history is not None:
parent_history = parent_history.parent_history

parent_history_path = parent_history.get_root_path()
parent_rel_path = os.path.relpath(history_path, parent_history_path)
for pattern in ignore_patterns:
if not pattern in final_ignores:
if "/" not in pattern:
final_ignores.append(pattern)
elif pattern.startswith("/**/"):
final_ignores.append(pattern)
elif belongs_to_child(pattern, history, parent_history_path):
# if child is ignored itself, we need to append the ignore pattern to the next parent
for child in history.walk_child_histories(history):
child_root: str
if os.name == "nt":
child_root = PureWindowsPath(child.get_root_path()).as_posix()
else:
child_root = child.get_root_path()
if child_root.endswith(pattern):
pattern = extract_ignore_pattern(pattern, history_path)
final_ignores.append(pattern)
continue
elif belongs_to_parent_or_neighbour(pattern, parent_rel_path):
continue
else:
pattern = extract_ignore_pattern(pattern, parent_rel_path)
final_ignores.append(pattern)
else:
continue
else:
for pattern in ignore_patterns:
if not pattern in final_ignores:
if (
not belongs_to_child(pattern, history, history_path)
and not pattern in ignore.default_ignore_list()
):
if pattern.startswith("/"):
final_ignores.append(pattern)
elif pattern.find("/") != -1:
final_ignores.append(extract_ignore_pattern(pattern))
else:
final_ignores.append(pattern)
else:
for child in history.walk_child_histories(history):
child_root: str
if os.name == "nt":
child_root = PureWindowsPath(child.get_root_path()).as_posix()
else:
child_root = child.get_root_path()
if child_root.endswith(pattern):
if history == child.parent_history:
pattern = extract_ignore_pattern(pattern)
final_ignores.append(pattern)
return MHLIgnoreSpec(final_ignores, latest_ignore_patterns)


def belongs_to_child(pattern, history, parent_history_path, ignore_child=None) -> bool:
if pattern.startswith("/"):
pattern = pattern[1:]
for child in history.child_histories:
if ignore_child == child:
continue
child_path = child.get_root_path()
parent_rel_path = os.path.relpath(child_path, parent_history_path)
if os.name == "nt":
parent_rel_path = parent_rel_path.replace("\\", "/")
if pattern.startswith(parent_rel_path):
return True

return False


def belongs_to_parent_or_neighbour(pattern, parent_rel_path) -> bool:
if "/" not in pattern:
return False
if pattern.startswith("/") and "/" not in pattern[1:]:
return True
if pattern.endswith("/") and "/" not in pattern[:-1]:
return False

if pattern.startswith("**/"):
return False

pattern_parts = pattern.strip("/").split("/")
parent_parts = parent_rel_path.strip(os.sep).split(os.sep)
i = 0
while i < min(len(pattern_parts), len(parent_parts)):
if pattern_parts[i] != parent_parts[i]:
if i > 0 and i < len(pattern_parts) - 1 and pattern_parts[i] == "**":
return False
return True
else:
i += 1
if i == len(pattern_parts):
return True

return False


def extract_ignore_pattern(pattern: str, parent_rel_path=None) -> str:
if pattern.startswith("/"):
if "/" in pattern[1:]:
pattern = pattern[1:]
else:
return pattern

pattern_rel_path = _extract_pattern_relative_to_history(pattern, parent_rel_path)

if pattern_rel_path is not None:
if pattern.endswith("/"):
if "/" in pattern[:-1]:
return "/" + (pattern if pattern.startswith("**/") else pattern_rel_path)
return pattern

if pattern.endswith("/**"):
if pattern.startswith("**/"):
return pattern
if "/" in pattern[:-3]:
return pattern_rel_path if pattern.startswith("/") else "/" + pattern_rel_path
return pattern

if "/" in pattern[:-1]:
return (
"/" + pattern
if pattern.startswith("**/")
else pattern_rel_path if pattern_rel_path.startswith("/") else "/" + pattern_rel_path
)

return pattern


def _extract_pattern_relative_to_history(pattern: str, history_path=None) -> str:
if pattern.startswith("**"):
return pattern
if history_path is None:
return pattern

pattern_parts = pattern.lstrip("/").split("/")
history_path_parts = history_path.lstrip(os.sep).split(os.sep)

i = j = k = 0

while i < len(history_path_parts):
if history_path_parts[i] == pattern_parts[0]:
break
else:
i += 1

while j < len(pattern_parts) and i + j < len(history_path_parts):
if history_path_parts[i + j] == pattern_parts[j]:
j += 1
else:
break

result = ""

while j < (len(pattern_parts)):
if k == 0:
result += pattern_parts[j]
j += 1
k += 1
else:
result += "/" + pattern_parts[j]
j += 1
if result != "":
return result
21 changes: 20 additions & 1 deletion ascmhl/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import re
from datetime import datetime, date, time

from . import hasher
from . import hasher, ignore
from .__version__ import ascmhl_folder_name, ascmhl_file_extension, ascmhl_chainfile_name, ascmhl_collectionfile_name
from . import hashlist_xml_parser, chain_xml_parser
from .utils import datetime_now_filename_string
Expand Down Expand Up @@ -98,6 +98,25 @@ def latest_ignore_patterns(self) -> Optional[List[str]]:
return None
return hash_list.process_info.ignore_spec.get_pattern_list()

def latest_ignore_pattern_from_nested_histories(self) -> Optional[List[str]]:
parent_path = self.get_root_path()
cumulated_ignores = []
for path, history in self.child_history_mappings.items():
for pattern in history.latest_ignore_patterns():
# don't add the default pattern
child_path = history.get_root_path()
path = os.path.relpath(child_path, parent_path)
if pattern in ignore.default_ignore_list():
continue
else:
# return the directory of the history with the pattern appended
if pattern.find("/") != -1:
cumulated_ignores.append(path + pattern)
else:
cumulated_ignores.append(path + "/**/" + pattern)

return cumulated_ignores

# methods to query and compare hashes
def find_original_hash_entry_for_path(self, relative_path: str) -> Optional[MHLHashEntry]:
"""Searches the history for the first (original) hash of a file
Expand Down
8 changes: 5 additions & 3 deletions ascmhl/traverse.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@ def post_order_lexicographic(top: str, ignore_pathspec: pathspec.PathSpec = None
children = []
for name in names:
file_path = os.path.join(top, name)
is_directory = isdir(file_path)
if is_directory:
file_path = file_path + "/"
if ignore_pathspec and ignore_pathspec.match_file(file_path):
if os.path.basename(os.path.normpath(file_path)) != ascmhl_folder_name:
logger.verbose(f"ignoring filepath {file_path}")
logger.verbose(f"ignoring filepath {file_path.rstrip('/')}")
continue
path = join(top, name)
children.append((name, isdir(path)))
children.append((name, is_directory))

# if directory, yield children recursively in post order until exhausted.
for name, is_dir in children:
Expand Down
7 changes: 7 additions & 0 deletions ascmhl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,10 @@ def convert_posix_to_local_path(path: str) -> str:
if os.name == "nt":
return str(PureWindowsPath(PurePosixPath(path)))
return path


def check_path_is_absolute_to_history(base_path, relative_path) -> bool:
Copy link

Copilot AI May 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using 'startswith' to determine if one path is within another may lead to false positives (e.g., '/root/foo' vs '/root/foobar'). Consider using os.path.commonpath or a more robust method for path comparison.

Copilot uses AI. Check for mistakes.
base_abs = os.path.abspath(base_path)
relative_abs = os.path.abspath(relative_path)

return relative_abs.startswith(base_abs)
Loading