Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions sheet_to_triples/rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""Context specific RDF behaviours for Visual Meaning graphs."""

import functools
import json
import operator
import re

Expand Down Expand Up @@ -55,8 +56,14 @@ def from_identifier(self, value):
value = _norm(value)
# if ends with language tag, create a Literal with the appropriate lang
if re.search(self.lang_match, value):
inner = value[1:-4] if value[0] == '"' else value[:-3]
return rdflib.Literal(inner, lang=value[-2:])
lang = value[-2:]
value_without_lang = value[:-3]
if value_without_lang.startswith('"') and value_without_lang.endswith('"'):
inner = json.loads(value_without_lang)
else:
# {"k": "v"}@en compatibility
inner = value_without_lang
return rdflib.Literal(inner, lang=lang)
return rdflib.Literal(value)


Expand Down Expand Up @@ -138,9 +145,12 @@ def _maybe_from_literal(maybe_literal):
them in the json serialisation for both strings and arrays or objects.
"""
if getattr(maybe_literal, 'language', None):
# For JSON arrays/objects, str() gives the JSON representation
if maybe_literal[:1] + maybe_literal[-1:] in ('[]', '{}'):
return str(maybe_literal) + '@' + maybe_literal.language
return maybe_literal.n3()
# For regular strings, use json.dumps to properly escape
# This ensures roundtrip compatibility with from_identifier
return json.dumps(str(maybe_literal)) + '@' + maybe_literal.language
return str(maybe_literal)


Expand Down
39 changes: 37 additions & 2 deletions sheet_to_triples/tests/test_rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""Unittests for the rdf.py module of sheet-to-triples."""

import io
import json
import unittest
from unittest import mock

Expand Down Expand Up @@ -158,15 +159,20 @@ def test_update_model_terms_language_tags(self):
# Not testing rdflib.Literal(1) as should maybe change behaviour?
('_s', '_p', rdflib.Literal('o')),
('_s', '_p', rdflib.Literal('o', lang='en')),
('_s', '_p', rdflib.Literal(
'thing: "quotes"\nand newlines\tand tabs', lang='en')),
('_s', '_p', rdflib.Literal('\u043e', lang='bg')),
('_s', '_p', rdflib.Literal('л', lang='bg')),
('_s', '_p', rdflib.Literal('', lang='en')),
('_s', '_p', rdflib.Literal('["o1", "o2"]', lang='en')),
('_s', '_p', rdflib.Literal('{"o": "v"}', lang='en')),
]
rdf.update_model_terms(model, triples)
expected = [
'o:_1', 'o', '"o"@en', '"\u043e"@bg', '""@en', '["o1", "o2"]@en',
'{"o": "v"}@en',
'o:_1', 'o', '"o"@en',
'"thing: \\"quotes\\"\\nand newlines\\tand tabs"@en',
'"\\u043e"@bg', '"\\u043b"@bg', '""@en',
'["o1", "o2"]@en', '{"o": "v"}@en',
]
self.assertEqual([t['obj'] for t in model['terms']], expected)

Expand Down Expand Up @@ -422,3 +428,32 @@ def test_from_identifier_lang_obj(self):
self.resolver.from_identifier('{"k": "v"}@en'),
rdflib.Literal('{"k": "v"}', lang='en')
Copy link
Contributor Author

@kisoso kisoso Feb 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this actually need to resolve to rdflib.Literal({"k": "v"}, lang='en') ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likely not - RDF literals don't have a dict type really.

)

def test_from_identifier_lang_roundtrip(self):
"""
Test the roundtrip workflow:
string -> from_identifier -> Literal -> update_model_terms -> string.
"""
test_texts = [
('simple text', 'en'),
('text\nwith\nnewlines', 'en'),
('text with "quotes"', 'fr'),
('text\twith\ttabs', 'de'),
('path\\with\\backslashes', 'es'),
('mixed: "quotes"\nand newlines\tand tabs', 'it'),
('', 'en'),
('о', 'bg'), # cyrillic o
('л', 'bg'),
]

for text, lang in test_texts:
with self.subTest(text=text, lang=lang):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha, you're using subTest() here.

initial_string = json.dumps(text) + '@' + lang
literal = self.resolver.from_identifier(initial_string)

model = {'terms': []}
triples = [('_s', '_p', literal)]
rdf.update_model_terms(model, triples)

resulting_string = model['terms'][0]['obj']
self.assertEqual(initial_string, resulting_string)