Skip to content

Commit ea35175

Browse files
Do not linktrail if following text is not [a-z]?
See wiktectract issue #1604 tatuylonen/wiktextract#1604 https://en.wikipedia.org/wiki/Help:Wikitext#Blend_link This should not be merged as is, because it will create problems in other extractors that might rely on different behavior. In the best-case scenario, there might be two different camps: 1) Languages that use spaces that want to do linktrailing 2) Languages without spaces that can't do linktrailing If this is the case, we might be able to get away with a kludge that checks whether the script of the last character in the link matches the script of the first character after the link.
1 parent 9905b1f commit ea35175

2 files changed

Lines changed: 11 additions & 2 deletions

File tree

src/wikitextprocessor/parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1029,7 +1029,7 @@ def text_fn(ctx: "Wtp", token: str) -> None:
10291029
and not node.children[-1].children
10301030
and not ctx.suppress_special
10311031
):
1032-
m = re.match(r"(?s)(\w+)(.*)", token)
1032+
m = re.match(r"(?s)([a-z]+)(.*)", token)
10331033
if m:
10341034
node.children[-1].children.append(m.group(1))
10351035
token = m.group(2)

tests/test_parser.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1101,7 +1101,7 @@ def test_link12(self):
11011101
self.assertEqual(link.kind, NodeKind.LINK)
11021102
self.assertEqual(link.largs, [["foo"], ["\n[bar"]])
11031103

1104-
def test_link_trailing(self):
1104+
def test_link_trailing_1(self):
11051105
tree = self.parse("test", "[[Help]]ing heal")
11061106
self.assertEqual(len(tree.children), 2)
11071107
a, b = tree.children
@@ -1110,6 +1110,15 @@ def test_link_trailing(self):
11101110
self.assertEqual(a.children, ["ing"])
11111111
self.assertEqual(b, " heal")
11121112

1113+
def test_link_trailing_not_latin(self):
1114+
tree = self.parse("test", "[[appellāre]]の直説法所相現在第 foo")
1115+
self.assertEqual(len(tree.children), 2)
1116+
a, b = tree.children
1117+
self.assertEqual(a.kind, NodeKind.LINK)
1118+
self.assertEqual(a.largs, [["appellāre"]])
1119+
self.assertEqual(a.children, [])
1120+
self.assertEqual(b, "の直説法所相現在第 foo")
1121+
11131122
def test_url1(self):
11141123
tree = self.parse("test", "this https://wikipedia.com link")
11151124
self.assertEqual(len(tree.children), 3)

0 commit comments

Comments
 (0)