diff --git a/api.yaml b/api.yaml
index 08d5d56..72a6642 100644
--- a/api.yaml
+++ b/api.yaml
@@ -140,13 +140,10 @@ paths:
The meta data for the new corpus can be provided in either JSON or XML
format. The JSON structure is a straightforward object providing
corpus name, title and (optionally) a repository URL. The XML format
- needs to be a TEI document with `teiCorpus` as its root element. The
+ needs to be a TEI document with `dracorCorpus` as its root element. The
corpus title needs to be provided in the `titleStmt` while the name
and repo URL are encoded in particular `idno` elements in the
`publicationStmt` (see example).
-
- NB: Contrary to the TEI schema our teiCorpus document must not contain
- the `TEI` elements for individual plays.
content:
application/json:
schema:
@@ -183,7 +180,7 @@ paths:
type: string
example: |
-
+
@@ -191,12 +188,12 @@ paths:
DraCor
- test
- https://github.com/dracor-org/testdracor
+ test
+
-
+
responses:
'200':
description: Returns corpus metadata
diff --git a/jobs/process-webhook-delivery.xq b/jobs/process-webhook-delivery.xq
index a16db7f..781a7e9 100644
--- a/jobs/process-webhook-delivery.xq
+++ b/jobs/process-webhook-delivery.xq
@@ -150,8 +150,10 @@ declare function local:process-delivery () {
/delivery[@id = $local:delivery and not(@processed)]
let $repo := $delivery/@repo/string()
let $after := $delivery/@after/string()
- let $corpus := collection($config:corpora-root)//tei:teiCorpus[
- tei:teiHeader//tei:publicationStmt/tei:idno[@type="repo" and . = $repo]
+ (: DEPRECATED: remove teiCorpus support in v2 :)
+ let $corpus := collection($config:corpora-root)/(tei:dracorCorpus|tei:teiCorpus)[
+ tei:teiHeader//tei:publicationStmt/tei:ref[@type="repo" and @target = $repo]
+ or tei:teiHeader//tei:publicationStmt/tei:idno[@type="repo" and . = $repo]
]
let $info := dutil:get-corpus-info($corpus)
diff --git a/modules/api.xqm b/modules/api.xqm
index 845286f..c5501d5 100644
--- a/modules/api.xqm
+++ b/modules/api.xqm
@@ -238,7 +238,8 @@ declare
%output:method("json")
function api:corpora($include) {
array {
- for $corpus in collection($config:corpora-root)//tei:teiCorpus
+ (: DEPRECATED: remove teiCorpus support in v2 :)
+ for $corpus in collection($config:corpora-root)/(tei:dracorCorpus|tei:teiCorpus)
let $info := dutil:get-corpus-info($corpus)
let $name := $info?name
order by $name
@@ -256,11 +257,8 @@ function api:corpora($include) {
(:~
: Add new corpus
:
- : @param $data corpus.xml containing teiCorpus element.
- : @result XML document
- :
- : FIXME: create utility function that can be used both here and in
- : api:corpora-post-json() below.
+ : @param $data corpus.xml containing dracorCorpus element.
+ : @result JSON
:)
declare
%rest:POST("{$data}")
@@ -269,69 +267,40 @@ declare
%rest:consumes("application/xml", "text/xml")
%rest:produces("application/json")
%output:method("json")
-function api:corpora-post-tei($data, $auth) {
+function api:corpora-post-tei($data as document-node(), $auth) {
if (not($auth)) then
(
-
-
- ,
- map {
- "message": "authorization required"
- }
+ ,
+ map { "message": "authorization required" }
)
- else
-
- let $header := if ($data) then $data//tei:teiCorpus/tei:teiHeader else ()
- let $name := $header//tei:publicationStmt/tei:idno[
- @type = "URI" and @xml:base = "https://dracor.org/"
- ]/text()
-
- let $title := $header//tei:titleStmt/tei:title[1]/text()
-
- return if (not($header)) then
+ else try {
+ dutil:create-corpus-from-xml($data/*)
+ } catch dutil:invalid-corpus-document {
(
-
-
- ,
- map {
- "error": "invalid document, expecting "
- }
+ ,
+ map { "error": "Invalid corpus document. " || $err:description }
)
- else if (not($name) or not($title)) then
+ } catch dutil:invalid-corpus-name {
(
-
-
- ,
- map {
- "error": "missing name or title"
- }
+ ,
+ map { "error": $err:description }
)
- else if (not(matches($name, '^[-a-z0-1]+$'))) then
+ } catch dutil:corpus-exists {
(
-
-
- ,
- map {
- "error": "invalid name",
- "message": "Only lower case ASCII letters and digits are accepted."
- }
+ ,
+ map { "error": $err:description }
)
- else
- let $corpus := dutil:get-corpus($name)
- return if ($corpus) then (
-
-
- ,
- map {
- "error": "corpus already exists"
- }
- ) else (
- dutil:create-corpus($name, $data/tei:teiCorpus),
+ } catch * {
+ (
+ ,
map {
- "name": $name,
- "title": $title
+ "error": $err:description,
+ "module": $err:module,
+ "line": $err:line-number,
+ "code": $err:code
}
)
+ }
};
(:~
@@ -339,9 +308,6 @@ function api:corpora-post-tei($data, $auth) {
:
: @param $data JSON object describing corpus meta data
: @result JSON object
- :
- : FIXME: create utility function that can be used both here and in
- : api:corpora-post-tei() above.
:)
declare
%rest:POST("{$data}")
@@ -351,43 +317,30 @@ declare
%output:media-type("application/json")
%output:method("json")
function api:corpora-post-json($data) {
- let $json := parse-json(util:base64-decode($data))
- let $name := $json?name
- let $description := $json?description
- let $corpus := dutil:get-corpus($name)
-
- return if ($corpus) then
+ if (not($auth)) then
(
-
-
- ,
- map {
- "error": "corpus already exists"
- }
+ ,
+ map { "message": "authorization required" }
)
- else if (not($name) or not($json?title)) then
+ else try {
+ let $json := parse-json(util:base64-decode($data))
+ return dutil:create-corpus($json)
+ } catch dutil:invalid-corpus-name {
(
-
-
- ,
- map {
- "error": "missing name or title"
- }
+ ,
+ map { "error": $err:description }
)
- else if (not(matches($name, '^[-a-z0-1]+$'))) then
+ } catch dutil:corpus-exists {
(
-
-
- ,
- map {
- "error": "invalid name",
- "message": "Only lower case ASCII letters and digits are accepted."
- }
+ ,
+ map { "error": $err:description }
)
- else (
- dutil:create-corpus($json),
- $json
- )
+ } catch * {
+ (
+ ,
+ map { "error": $err:description }
+ )
+ }
};
(:~
diff --git a/modules/dts.xqm b/modules/dts.xqm
index 0ac9de6..c80e274 100644
--- a/modules/dts.xqm
+++ b/modules/dts.xqm
@@ -219,7 +219,8 @@ as item()+ {
"Paging is not possible on a single resource. Try without parameter 'page'!"
)
- else if ($corpus/name() eq "teiCorpus") then
+ (: DEPRECATED: remove teiCorpus support in v2 :)
+ else if ($corpus/name() = ("dracorCorpus", "teiCorpus")) then
if ( $nav eq 'parents') then
local:corpus-to-collection-with-parent-as-member($id)
else
@@ -238,7 +239,8 @@ as item()+ {
let $corpusname := local:uri-to-id($id)
let $corpus := dutil:get-corpus($corpusname)
return
- if ($corpus/name() eq "teiCorpus") then
+ (: DEPRECATED: remove teiCorpus support in v2 :)
+ if ($corpus/name() = ("dracorCorpus", "teiCorpus")) then
if ( $page ) then
(: paging is currently not supported :)
(: test: http://localhost:8088/api/v1/dts/collection?id=http://localhost:8088/id/rus&page=1 :)
@@ -368,10 +370,13 @@ as item()+ {
:)
declare function local:root-collection()
as map() {
- (: Get the corpora, get info needed for the member-array :)
- let $corpora := collection($config:corpora-root)//tei:teiCorpus
- (: get all the ids – these has to evaluate the teiCorpus files, unfortunately :)
- let $corpus-ids := $corpora//tei:idno[@type eq "URI"][@xml:base eq "https://dracor.org/"]/string()
+ (: Get the corpora, get info needed for the member-array :)
+ (: DEPRECATED: remove teiCorpus support in v2 :)
+ let $corpora := collection($config:corpora-root)//(tei:dracorCorpus|tei:teiCorpus)
+ (: get all the ids – these has to evaluate the dracorCorpus files, unfortunately :)
+ let $corpus-ids := $corpora//tei:publicationStmt/tei:idno[
+ not(@type) or (@type eq "URI" and @xml:base eq "https://dracor.org/")
+ ]/string()
let $members := array {
for $corpus-id in $corpus-ids
return local:collection-member-by-id($corpus-id)
diff --git a/modules/load.xqm b/modules/load.xqm
index e2214d7..b343c08 100644
--- a/modules/load.xqm
+++ b/modules/load.xqm
@@ -72,11 +72,13 @@ declare function local:record-corpus-sha($name) {
(:~
: Load corpus from ZIP archive
:
- : @param $corpus The element providing corpus name and archive URL
+ : @param $corpus The element providing corpus name and archive URL
: @return List of created collections and files
+ :
+ : NB: until we remove support for it in v2 $corpus can also be a
+ : element
:)
-declare function load:load-corpus($corpus as element(tei:teiCorpus))
-as xs:string* {
+declare function load:load-corpus($corpus as element()) as xs:string* {
let $info := dutil:get-corpus-info($corpus)
let $name := $info?name
diff --git a/modules/metrics.xqm b/modules/metrics.xqm
index 4e92a5a..bbacaad 100644
--- a/modules/metrics.xqm
+++ b/modules/metrics.xqm
@@ -77,7 +77,8 @@ declare function metrics:collect-sitelinks($corpus as xs:string) {
: sitelinks collection
:)
declare function metrics:collect-sitelinks() {
- for $corpus in collection($config:corpora-root)//tei:teiCorpus
+ (: DEPRECATED: remove teiCorpus support in v2 :)
+ for $corpus in collection($config:corpora-root)/(tei:dracorCorpus|tei:teiCorpus)
let $info := dutil:get-corpus-info($corpus)
return metrics:collect-sitelinks($info?name)
};
diff --git a/modules/util.xqm b/modules/util.xqm
index cde4f4f..1571eec 100644
--- a/modules/util.xqm
+++ b/modules/util.xqm
@@ -473,19 +473,20 @@ declare function dutil:count-sitelinks(
};
(:~
- : Get teiCorpus element for corpus identified by $corpusname.
+ : Get dracorCorpus element for corpus identified by $corpusname.
:
: @param $corpusname
- : @return teiCorpus element
+ : @return dracorCorpus element
:)
declare function dutil:get-corpus(
$corpusname as xs:string
) as element()* {
- collection($config:corpora-root)//tei:teiCorpus[
+ (: DEPRECATED: remove teiCorpus support in v2 :)
+ collection($config:corpora-root)/(tei:dracorCorpus|tei:teiCorpus)[
tei:teiHeader//tei:publicationStmt/tei:idno[
- @type="URI" and
- @xml:base="https://dracor.org/" and
- . = $corpusname
+ (not(@type) or
+ (@type="URI" and @xml:base="https://dracor.org/")
+ ) and . = $corpusname
]
]
};
@@ -513,16 +514,20 @@ declare function local:markdown($input as element()) as item()* {
: @param $corpusname
: @return map
:)
-declare function dutil:get-corpus-info(
- $corpus as element(tei:teiCorpus)*
-) as map(*)* {
- let $header := $corpus/tei:teiHeader
+declare function dutil:get-corpus-info($corpus as element()*) as map(*)* {
+ (: IMPORTANT: do not assign any text() nodes to variables emitted with the
+ returned map. These will vanish when the corpus.xml is deleted and thus
+ break load:load-corpus(). (the "recreating null" problem)
+ :)
+ let $header := $corpus[1]/tei:teiHeader
let $name := $header//tei:publicationStmt/tei:idno[
- @type="URI" and @xml:base="https://dracor.org/"
+ not(@type) or (@type="URI" and @xml:base="https://dracor.org/")
][1]/string()
let $title := $header/tei:fileDesc/tei:titleStmt/tei:title[1]/string()
let $acronym := $header/tei:fileDesc/tei:titleStmt/tei:title[@type="acronym"][1]/string()
- let $repo := $header//tei:publicationStmt/tei:idno[@type="repo"][1]/string()
+ let $repo := $header//tei:publicationStmt/(
+ tei:idno[@type="repo"][1]|tei:ref[@type="repo"]/@target
+ )/string()
let $projectDesc := $header/tei:encodingDesc/tei:projectDesc
let $licence := $header//tei:publicationStmt/tei:availability/tei:licence
let $description := if ($projectDesc) then (
@@ -530,7 +535,7 @@ declare function dutil:get-corpus-info(
return string-join($paras, "
")
) else ()
let $git-file := $config:corpora-root || "/" || $name || "/git.xml"
- let $sha := doc($git-file)/git/sha/text()
+ let $sha := doc($git-file)/git/sha/string()
return if ($header) then (
map:merge((
@@ -1376,61 +1381,143 @@ declare function dutil:id-to-url (
else ()
};
+declare function local:create-corpus(
+ $name as xs:string,
+ $xml as element(tei:dracorCorpus)
+) {
+ util:log-system-out("creating corpus"),
+ util:log-system-out($xml),
+ xmldb:store(
+ xmldb:create-collection($config:corpora-root, $name),
+ "corpus.xml",
+ $xml
+ )
+};
+
(:~
- : Create new corpus collection
+ : Create new corpus collection from JSON
:
: @param $corpus Map with corpus description
:)
declare function dutil:create-corpus($corpus as map()) {
- let $xml :=
-
-
-
-
- {$corpus?title}
-
-
- {$corpus?name}
- {
- if ($corpus?repository)
- then {$corpus?repository}
- else ()
- }
-
-
- {if ($corpus?description) then (
-
-
+ let $name := $corpus?name
+
+ return if (not($name) or not($corpus?title)) then
+ error (
+ xs:QName('dutil:invalid-corpus-document'),
+ "Missing corpus name or title"
+ )
+ else if (not(matches($name, '^[a-z]+$'))) then
+ error (
+ xs:QName('dutil:invalid-corpus-name'),
+ "Invalid name '" || $name
+ || "'. Only lower case ASCII letters are accepted"
+ )
+ else (
+ let $exists := dutil:get-corpus($name)
+ return if ($exists) then
+ error (
+ xs:QName('dutil:corpus-exists'),
+ "Corpus with name '" || $name || "' already exists"
+ )
+ else
+ let $xml :=
+
+
+
+
+ {$corpus?title}
+
+
+ {$corpus?name}
{
- for $p in tokenize($corpus?description, "
")
- return {$p}
+ if ($corpus?repository)
+ then
+ else ()
}
-
-
- ) else ()}
-
-
-
- return dutil:create-corpus($corpus?name, $xml)
+
+
+ {if ($corpus?description) then (
+
+
+ {
+ for $p in tokenize($corpus?description, "
")
+ return {$p}
+ }
+
+
+ ) else ()}
+
+
+ return (
+ local:create-corpus($name, $xml),
+ $corpus
+ )
+ )
};
(:~
- : Create new corpus collection
+ : Create new corpus collection from dracorCorpus document
:
- : @param $name Corpus name
- : @param $xml Corpus description
+ : This function does not rely on the corpus document to be valid against the
+ : DraCor schema. In fact, it works with legacy teiCorpus documents from which
+ : it only extracts the teiHeader element and wraps it into dracorCorpus.
+ :
+ : @param $xml dracorCorpus document
:)
-declare function dutil:create-corpus(
- $name as xs:string,
- $xml as element(tei:teiCorpus)
-) {
- util:log-system-out("creating corpus"),
- util:log-system-out($xml),
- xmldb:store(
- xmldb:create-collection($config:corpora-root, $name),
- "corpus.xml",
- $xml
+declare function dutil:create-corpus-from-xml($xml as element()) {
+ let $corpus := element {QName('http://www.tei-c.org/ns/1.0', 'dracorCorpus')} {
+ $xml/tei:teiHeader
+ }
+
+ let $header := $corpus/tei:teiHeader[1]
+ (: A document might (erroneously) have multiple idnos from which we extract
+ the distinct values. Below we will throw an error if there is more than
+ one unique name. :)
+ let $name := distinct-values(
+ $header//tei:publicationStmt/tei:idno[
+ not(@type)
+ or (: DEPRECATED: drop support for in v2 :)
+ @type = "URI" and @xml:base = "https://dracor.org/"
+ ]
)
+ let $title := $header//tei:titleStmt/tei:title[1]/text()
+
+ return if (not($header)) then
+ error (
+ xs:QName('dutil:invalid-corpus-document'),
+ "Missing teiHeader element"
+ )
+ else if (count($name) > 1) then
+ error (
+ xs:QName('dutil:invalid-corpus-document'),
+ "Multiple corpus names found"
+ )
+ else if (not($name) or not($title)) then
+ error (
+ xs:QName('dutil:invalid-corpus-document'),
+ "Missing corpus name or title"
+ )
+ else if (not(matches($name, '^[a-z]+$'))) then
+ error (
+ xs:QName('dutil:invalid-corpus-name'),
+ "Invalid name '" || $name
+ || "'. Only lower case ASCII letters are accepted"
+ )
+ else
+ let $exists := dutil:get-corpus($name)
+ return if ($exists) then (
+ error (
+ xs:QName('dutil:corpus-exists'),
+ "Corpus with name '" || $name || "' already exists"
+ )
+ ) else (
+ local:create-corpus($name, $corpus),
+ map {
+ "name": $name,
+ "title": $title
+ }
+ )
};
(:~
diff --git a/modules/webhook.xqm b/modules/webhook.xqm
index 5aac742..d9e1729 100644
--- a/modules/webhook.xqm
+++ b/modules/webhook.xqm
@@ -29,8 +29,11 @@ declare function local:check-signature (
};
declare function local:get-corpus ($repo-url as xs:string) as element()? {
- collection($config:corpora-root)//tei:teiCorpus[
- tei:teiHeader//tei:publicationStmt/tei:idno[@type="repo" and . = $repo-url]
+ collection($config:corpora-root)/(tei:dracorCorpus|tei:teiCorpus)[
+ tei:teiHeader//tei:publicationStmt/(
+ tei:ref[@type="repo" and @target = $repo-url] |
+ tei:idno[@type="repo" and . = $repo-url]
+ )
]
};