Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions dagshub/data_engine/annotation/importer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from difflib import SequenceMatcher
from pathlib import Path, PurePosixPath
from pathlib import Path, PurePosixPath, PurePath
from tempfile import TemporaryDirectory
from typing import TYPE_CHECKING, Literal, Optional, Union, Sequence, Mapping, Callable, List

Expand Down Expand Up @@ -138,7 +138,7 @@ def remap_annotations(
"""
if remap_func is None:
first_ann = list(annotations.keys())[0]
first_ann_filename = PurePosixPath(first_ann).name
first_ann_filename = Path(first_ann).name
queried = self.ds["path"].endswith(first_ann_filename).select("size").all()
dp_paths = [dp.path for dp in queried]
remap_func = self.guess_annotation_filename_remapping(first_ann, dp_paths)
Expand Down Expand Up @@ -185,12 +185,14 @@ def guess_annotation_filename_remapping(

@staticmethod
def generate_path_map_func(ann_path: str, dp_path: str) -> Callable[[str], Optional[str]]:
ann_path_posix = PurePosixPath(ann_path)
# Using os-dependent path for ann_path because we're getting it from the importer,
# which will return os-dependent paths
ann_path_obj = PurePath(ann_path)
dp_path_posix = PurePosixPath(dp_path)

matcher = SequenceMatcher(
None,
ann_path_posix.parts,
ann_path_obj.parts,
dp_path_posix.parts,
)
diff = matcher.get_matching_blocks()
Expand All @@ -204,7 +206,7 @@ def generate_path_map_func(ann_path: str, dp_path: str) -> Callable[[str], Optio

match = diff[0]
# Make sure that the match goes until the end
if match.a + match.size != len(ann_path_posix.parts) or match.b + match.size != len(dp_path_posix.parts):
if match.a + match.size != len(ann_path_obj.parts) or match.b + match.size != len(dp_path_posix.parts):
raise CannotRemapPathError(ann_path, dp_path)
# ONE of the paths need to go until the start
if match.a != 0 and match.b != 0:
Expand All @@ -214,6 +216,7 @@ def generate_path_map_func(ann_path: str, dp_path: str) -> Callable[[str], Optio
if match.a == 0 and match.b == 0:

def identity_func(x: str) -> str:
# Do a replace because we might be going from a windows path to a posix path
return x.replace(ann_path, dp_path)

return identity_func
Expand All @@ -227,17 +230,17 @@ def identity_func(x: str) -> str:
prefix = dp_path_posix.parts[match.a : match.b]

def add_prefix(x: str) -> Optional[str]:
return PurePosixPath(*prefix, x).as_posix()
return PurePath(*prefix, x).as_posix()

return add_prefix

else:
# Remove the prefix
def remove_prefix(x: str) -> Optional[str]:
p = PurePosixPath(x)
p = PurePath(x)
if len(p.parts) <= match.a:
return None
return PurePosixPath(*p.parts[match.a :]).as_posix()
return PurePath(*p.parts[match.a :]).as_posix()

return remove_prefix

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def get_version(rel_path: str) -> str:
"python-dateutil",
"boto3",
"semver",
"dagshub-annotation-converter>=0.1.5",
"dagshub-annotation-converter>=0.1.12",
]

extras_require = {
Expand Down