Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 39 additions & 43 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,16 +136,30 @@ tro.add_performance(
```

The `accessed_arrangement` and `modified_arrangement` parameters accept a single `str`,
a `list[str]`, or `None`.
a `(arrangement_id, mount_path)` tuple, a list of either, or `None`.

---

## `ArrangementRef` and mount paths
## Mount paths and `ArrangementBinding`

Each entry in `accessed_arrangements` / `contributed_to_arrangements` on a
`TrustedResearchPerformance` is an `ArrangementRef` — a small object that pairs an arrangement
`@id` with an optional `path` indicating the mount point (the directory that arrangement paths
are relative to).
The same arrangement can be mounted at different paths in different performances (e.g.
`/input` in one run and `/output` in another). To represent this unambiguously in RDF,
each reference is wrapped in an intermediate `trov:ArrangementBinding` object that ties the
arrangement, the mount path, and the performance together.

```json
"trov:accessedArrangement": [
{
"@id": "trp/0/binding/0",
"@type": "trov:ArrangementBinding",
"trov:arrangement": { "@id": "arrangement/0" },
"trov:boundTo": "/mnt/input"
}
]
```

Binding IDs are generated automatically when you call `add_performance`.
The `boundTo` field is optional — omit it when the path is not meaningful.

### CLI — `ARRANGEMENT_ID:MOUNT_PATH` syntax

Expand All @@ -160,59 +174,41 @@ tro-utils --declaration my.jsonld performance add \
-M arrangement/2:/mnt/output
```

Entries without a `:` are plain arrangement IDs (no mount path recorded). The two forms
Entries without a `:` are plain arrangement IDs (no mount path recorded). The two forms
can be mixed freely in a single command.

### Python API

Pass `ArrangementRef` objects directly to `add_performance`:
`add_performance` accepts each arrangement as either a plain `str` (no mount path)
or a `(arrangement_id, mount_path)` tuple:

```python
from tro_utils.models import ArrangementRef

tro.add_performance(
start_time=start,
end_time=end,
comment="Containerised run",
# Mix ArrangementRef objects and plain strings freely
accessed_arrangement=[
ArrangementRef("arrangement/0", path="/mnt/input"),
"arrangement/1", # path omitted
("arrangement/0", "/mnt/input"), # tuple: (id, boundTo path)
"arrangement/1", # plain string: no path
],
modified_arrangement=ArrangementRef("arrangement/2", path="/mnt/output"),
modified_arrangement=("arrangement/2", "/mnt/output"),
attrs=[TRPAttribute.NET_ISOLATION],
)
```

You can also construct `ArrangementRef` objects directly when building a model:
A single value or a list is accepted for both parameters.

```python
from tro_utils.models import ArrangementRef, TrustedResearchPerformance
The resolved `ArrangementBinding` objects are stored on
`TrustedResearchPerformance.accessed_arrangements` and
`TrustedResearchPerformance.contributed_to_arrangements`:

perf = TrustedResearchPerformance(
performance_id="trp/0",
accessed_arrangements=[
ArrangementRef(arrangement_id="arrangement/0", path="/mnt/data"),
ArrangementRef(arrangement_id="arrangement/1", path="/mnt/reference"),
],
contributed_to_arrangements=[
ArrangementRef(arrangement_id="arrangement/2"), # path is optional
],
)
```

In JSON-LD, a ref with a path serialises as:

```json
{
"@id": "arrangement/0",
"trov:mountPath": "/mnt/data"
}
```python
for binding in perf.accessed_arrangements:
print(binding.binding_id) # e.g. "trp/0/binding/0"
print(binding.arrangement_id) # e.g. "arrangement/0"
print(binding.path) # e.g. "/mnt/input" (or None)
```

A ref without a path serialises as `{ "@id": "arrangement/0" }`, preserving backwards
compatibility with existing TRO files that contain plain `@id`-only objects.

---

### `TRO` — high-level facade
Expand Down Expand Up @@ -247,8 +243,8 @@ tro.add_performance(
end_time=datetime(2024, 3, 2, 10, 0, 11),
comment="My workflow run",
attrs=["trov:InternetIsolation"],
accessed_arrangement="arrangement/0", # str, list[str], or None
modified_arrangement="arrangement/1", # str, list[str], or None
accessed_arrangement="arrangement/0", # str | (id, path) tuple | list | None
modified_arrangement="arrangement/1", # str | (id, path) tuple | list | None
)

# Save, sign, and timestamp
Expand Down Expand Up @@ -318,8 +314,8 @@ TransparentResearchObject tro.py
├── ArtifactArrangement[] arrangement.py
│ └── ArtifactLocation[] arrangement.py
├── TrustedResearchPerformance[] performance.py
│ ├── ArrangementRef[] performance.py (accessed_arrangements)
│ ├── ArrangementRef[] performance.py (contributed_to_arrangements)
│ ├── ArrangementBinding[] performance.py (accessed_arrangements)
│ ├── ArrangementBinding[] performance.py (contributed_to_arrangements)
│ └── PerformanceAttribute[] performance.py
└── TROAttribute[] attribute.py
```
Expand Down
26 changes: 16 additions & 10 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,13 +556,19 @@ def test_performance_add_plain_ids(
with open(tro_file) as f:
data = json.load(f)
perf = data["@graph"][0]["trov:hasPerformance"][0]
assert perf["trov:accessedArrangement"]["@id"] == "arrangement/0"
assert perf["trov:contributedToArrangement"]["@id"] == "arrangement/1"
assert (
perf["trov:accessedArrangement"]["trov:arrangement"]["@id"]
== "arrangement/0"
)
assert (
perf["trov:contributedToArrangement"]["trov:arrangement"]["@id"]
== "arrangement/1"
)

def test_performance_add_id_with_path(
self, runner, tmp_path, temp_workspace, trs_profile
):
"""ARRANGEMENT_ID:PATH syntax is parsed and serialised as trov:mountPath."""
"""ARRANGEMENT_ID:PATH syntax is parsed and serialised as trov:boundTo."""
tro_file = tmp_path / "test_tro.jsonld"
self._setup_tro_with_arrangements(
runner,
Expand Down Expand Up @@ -593,11 +599,11 @@ def test_performance_add_id_with_path(
data = json.load(f)
perf = data["@graph"][0]["trov:hasPerformance"][0]
accessed = perf["trov:accessedArrangement"]
assert accessed["@id"] == "arrangement/0"
assert accessed["trov:mountPath"] == "/mnt/input"
assert accessed["trov:arrangement"]["@id"] == "arrangement/0"
assert accessed["trov:boundTo"] == "/mnt/input"
contributed = perf["trov:contributedToArrangement"]
assert contributed["@id"] == "arrangement/1"
assert contributed["trov:mountPath"] == "/mnt/output"
assert contributed["trov:arrangement"]["@id"] == "arrangement/1"
assert contributed["trov:boundTo"] == "/mnt/output"

def test_performance_add_multiple_accessed_with_paths(
self, runner, tmp_path, temp_workspace, trs_profile
Expand Down Expand Up @@ -652,6 +658,6 @@ def test_performance_add_multiple_accessed_with_paths(
accessed = perf["trov:accessedArrangement"]
assert isinstance(accessed, list)
assert len(accessed) == 2
by_id = {r["@id"]: r for r in accessed}
assert by_id["arrangement/0"]["trov:mountPath"] == "/mnt/a"
assert "trov:mountPath" not in by_id["arrangement/1"]
by_id = {r["trov:arrangement"]["@id"]: r for r in accessed}
assert by_id["arrangement/0"]["trov:boundTo"] == "/mnt/a"
assert "trov:boundTo" not in by_id["arrangement/1"]
75 changes: 45 additions & 30 deletions tests/test_tro_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
ArtifactArrangement,
ArtifactComposition,
ArtifactLocation,
ArrangementRef,
CompositionFingerprint,
HashValue,
PerformanceAttribute,
Expand Down Expand Up @@ -461,8 +460,11 @@ def test_add_performance(self, temp_workspace, tmp_path, gpg_setup, trs_profile)
assert perf["rdfs:comment"] == "Data processing workflow"
assert perf["trov:startedAtTime"] == "2024-01-01T10:00:00"
assert perf["trov:endedAtTime"] == "2024-01-01T11:00:00"
assert perf["trov:accessedArrangement"]["@id"] == "arrangement/0"
assert perf["trov:contributedToArrangement"]["@id"] == "arrangement/1"
accessed = perf["trov:accessedArrangement"]
assert accessed["@type"] == "trov:ArrangementBinding"
assert accessed["trov:arrangement"]["@id"] == "arrangement/0"
contributed = perf["trov:contributedToArrangement"]
assert contributed["trov:arrangement"]["@id"] == "arrangement/1"
assert len(perf["trov:hasPerformanceAttribute"]) == 2

def test_add_performance_invalid_arrangement(
Expand Down Expand Up @@ -520,11 +522,14 @@ def test_add_performance_multiple_arrangements(
accessed = perf["trov:accessedArrangement"]
assert isinstance(accessed, list)
assert len(accessed) == 2
assert {r["@id"] for r in accessed} == {"arrangement/0", "arrangement/1"}
assert {r["trov:arrangement"]["@id"] for r in accessed} == {
"arrangement/0",
"arrangement/1",
}
# One contributed → serialised as a plain dict
contributed = perf["trov:contributedToArrangement"]
assert isinstance(contributed, dict)
assert contributed["@id"] == "arrangement/2"
assert contributed["trov:arrangement"]["@id"] == "arrangement/2"

def test_add_performance_multiple_arrangements_invalid(
self, tmp_path, gpg_setup, trs_profile
Expand All @@ -549,7 +554,7 @@ def test_add_performance_multiple_arrangements_invalid(
def test_add_performance_arrangement_ref_with_path(
self, temp_workspace, tmp_path, gpg_setup, trs_profile
):
"""ArrangementRef objects with path are accepted and serialise trov:mountPath."""
"""A (arrangement_id, path) tuple is accepted and serialises trov:boundTo."""
tro = create_tro_with_gpg(
filepath=str(tmp_path / "test_tro.jsonld"),
gpg_setup=gpg_setup,
Expand All @@ -563,24 +568,24 @@ def test_add_performance_arrangement_ref_with_path(
start_time=datetime.datetime(2024, 1, 1, 10, 0, 0),
end_time=datetime.datetime(2024, 1, 1, 11, 0, 0),
comment="with path",
accessed_arrangement=ArrangementRef("arrangement/0", path="/mnt/data"),
accessed_arrangement=("arrangement/0", "/mnt/data"),
modified_arrangement="arrangement/1",
attrs=[],
)

perf = tro.data["@graph"][0]["trov:hasPerformance"][0]
accessed = perf["trov:accessedArrangement"]
assert accessed["@id"] == "arrangement/0"
assert accessed["trov:mountPath"] == "/mnt/data"
assert accessed["trov:arrangement"]["@id"] == "arrangement/0"
assert accessed["trov:boundTo"] == "/mnt/data"
# contributed has no path
contributed = perf["trov:contributedToArrangement"]
assert contributed["@id"] == "arrangement/1"
assert "trov:mountPath" not in contributed
assert contributed["trov:arrangement"]["@id"] == "arrangement/1"
assert "trov:boundTo" not in contributed

def test_add_performance_mixed_strings_and_refs(
self, temp_workspace, tmp_path, gpg_setup, trs_profile
):
"""A mixed list of str and ArrangementRef is accepted; mountPaths serialised where set."""
"""A mixed list of str and (id, path) tuples is accepted; boundTo serialised where set."""
tro = create_tro_with_gpg(
filepath=str(tmp_path / "test_tro.jsonld"),
gpg_setup=gpg_setup,
Expand All @@ -597,23 +602,23 @@ def test_add_performance_mixed_strings_and_refs(
end_time=datetime.datetime(2024, 1, 1, 11, 0, 0),
comment="mixed",
accessed_arrangement=[
ArrangementRef("arrangement/0", path="/mnt/input"),
("arrangement/0", "/mnt/input"),
"arrangement/1",
],
modified_arrangement=ArrangementRef("arrangement/2", path="/mnt/output"),
modified_arrangement=("arrangement/2", "/mnt/output"),
attrs=[],
)

perf = tro.data["@graph"][0]["trov:hasPerformance"][0]
accessed = perf["trov:accessedArrangement"]
assert isinstance(accessed, list)
assert len(accessed) == 2
by_id = {r["@id"]: r for r in accessed}
assert by_id["arrangement/0"]["trov:mountPath"] == "/mnt/input"
assert "trov:mountPath" not in by_id["arrangement/1"]
by_id = {r["trov:arrangement"]["@id"]: r for r in accessed}
assert by_id["arrangement/0"]["trov:boundTo"] == "/mnt/input"
assert "trov:boundTo" not in by_id["arrangement/1"]
contributed = perf["trov:contributedToArrangement"]
assert contributed["@id"] == "arrangement/2"
assert contributed["trov:mountPath"] == "/mnt/output"
assert contributed["trov:arrangement"]["@id"] == "arrangement/2"
assert contributed["trov:boundTo"] == "/mnt/output"


class TestTROSigning:
Expand Down Expand Up @@ -1053,9 +1058,13 @@ def test_complete_data_processing_workflow(
performances = tro.data["@graph"][0]["trov:hasPerformance"]
assert len(performances) == 1
assert "threshold=150" in performances[0]["rdfs:comment"]
assert performances[0]["trov:accessedArrangement"]["@id"] == "arrangement/0"
assert (
performances[0]["trov:contributedToArrangement"]["@id"] == "arrangement/1"
performances[0]["trov:accessedArrangement"]["trov:arrangement"]["@id"]
== "arrangement/0"
)
assert (
performances[0]["trov:contributedToArrangement"]["trov:arrangement"]["@id"]
== "arrangement/1"
)

# Verify composition has unique artifacts
Expand Down Expand Up @@ -1493,18 +1502,18 @@ def test_verify_nested_directory_structure(self, tmp_path, gpg_setup):
assert len(extra) == 0

def test_get_arrangement_path_hash_map(self, temp_workspace, tmp_path, gpg_setup):
"""Test getting the path-to-hash mapping for an arrangement."""
"""Test getting the path-to-hash binding for an arrangement."""
tro = create_tro_with_gpg(
filepath=str(tmp_path / "test_tro.jsonld"), gpg_setup=gpg_setup
)

# Add arrangement
tro.add_arrangement(str(temp_workspace), comment="Test", ignore_dirs=[])

# Get the mapping
# Get the binding
path_hash_map = tro.get_arrangement_path_hash_map("arrangement/0")

# Verify mapping contains all files
# Verify binding contains all files
assert len(path_hash_map) == 3
assert "input_data.csv" in path_hash_map
assert "notes.txt" in path_hash_map
Expand All @@ -1524,7 +1533,7 @@ def test_get_arrangement_path_hash_map_invalid_id(self, tmp_path, gpg_setup):
filepath=str(tmp_path / "test_tro.jsonld"), gpg_setup=gpg_setup
)

# Try to get mapping for non-existent arrangement
# Try to get binding for non-existent arrangement
with pytest.raises(ValueError, match="not found"):
tro.get_arrangement_path_hash_map("arrangement/99")

Expand Down Expand Up @@ -1774,10 +1783,10 @@ def test_to_path_hash_map(self, tmp_path):

comp = ArtifactComposition()
arr = ArtifactArrangement.from_directory(d, comp, "arrangement/0")
mapping = arr.to_path_hash_map(comp)
binding = arr.to_path_hash_map(comp)

assert "file.txt" in mapping
assert mapping["file.txt"].startswith("sha256:")
assert "file.txt" in binding
assert binding["file.txt"].startswith("sha256:")

def test_to_from_jsonld_roundtrip(self):
arr = ArtifactArrangement(
Expand Down Expand Up @@ -2035,14 +2044,20 @@ class TestTrustedResearchPerformance:
"""Unit tests for TrustedResearchPerformance."""

def test_to_from_jsonld_roundtrip(self):
from tro_utils.models import ArrangementBinding

trp = TrustedResearchPerformance(
performance_id="trp/0",
comment="test run",
conducted_by_id="trs",
started_at=datetime.datetime(2024, 1, 1, 10, 0, 0),
ended_at=datetime.datetime(2024, 1, 1, 11, 0, 0),
accessed_arrangements=[ArrangementRef("arrangement/0", path="/workdir")],
contributed_to_arrangements=[ArrangementRef("arrangement/1")],
accessed_arrangements=[
ArrangementBinding("trp/0/binding/0", "arrangement/0", path="/workdir")
],
contributed_to_arrangements=[
ArrangementBinding("trp/0/binding/1", "arrangement/1")
],
attributes=[
PerformanceAttribute(
"trp/0/attribute/0", "trov:InternetIsolation", "trs/cap/0"
Expand Down
7 changes: 3 additions & 4 deletions tro_utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,15 @@
from . import TRPAttribute
from .models.arrangement import ArtifactArrangement
from .models.composition import ArtifactComposition
from .models.performance import ArrangementRef
from .tro_utils import TRO

console = Console()


def _parse_arrangement_ref(value: str) -> ArrangementRef:
"""Parse ``ARRANGEMENT_ID`` or ``ARRANGEMENT_ID:MOUNT_PATH`` into an :class:`ArrangementRef`."""
def _parse_arrangement_ref(value: str) -> tuple[str, str | None]:
"""Parse ``ARRANGEMENT_ID`` or ``ARRANGEMENT_ID:MOUNT_PATH`` into a ``(id, path)`` tuple."""
arrangement_id, _, path = value.partition(":")
return ArrangementRef(arrangement_id=arrangement_id, path=path or None)
return (arrangement_id, path or None)


_TEMPLATES = {
Expand Down
Loading
Loading