-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpull
More file actions
executable file
·78 lines (65 loc) · 2.51 KB
/
pull
File metadata and controls
executable file
·78 lines (65 loc) · 2.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python
import argparse
import json
import logging
import os
import sys
import yaml
from config import metadata, special_chars, numeric_parser, transcription_filter, postprocess
from lxml import etree
from tpen import TPen
from tpen2tei.parse import from_sc
def pull_ms(directory, projectlist):
if len(projectlist) == 0:
return None
for p in projectlist:
directory.project(project=p)
first = projectlist.pop(0)
ms = json.loads(first.get('data'))
canvases = ms.get('sequences')[0].get('canvases')
for p in projectlist:
np = json.loads(p.get('data'))
canvases.extend(np.get('sequences')[0].get('canvases'))
return ms
if __name__ == '__main__':
argp = argparse.ArgumentParser(description="Test the conversion of a particular MS to XML")
argp.add_argument('identifier')
argp.add_argument('--config', default='./tpen.yml')
argp.add_argument('--members', default='./members.json')
opts = argp.parse_args()
# Parse the list of project members
members = None
if os.path.isfile(opts.members):
with open(opts.members, encoding="utf-8") as f:
members = json.load(f)
# Get the config and initialise T-PEN
with open(opts.config) as y:
config = yaml.load(y, Loader=yaml.Loader)
logargs = {
'format': '%(asctime)s %(message)s',
'level': config.get('loglevel', 'INFO')
}
if 'logfile' in config:
logargs['filename'] = config.get('logfile')
else:
logargs['stream'] = sys.stderr
logging.basicConfig (**logargs)
tpenobj = TPen(cfg=config)
# Pick out the relevant projects from the list, sort by label, and fetch them
testinfo = sorted([x for x in tpenobj.projects_list() if x['label'].find(opts.identifier) > -1],
key=lambda e: e.get('label'))
# Merge the JSON into one object and XMLify it
jsonspec = pull_ms(tpenobj, testinfo)
if jsonspec is None:
print("No projects found with %s in title. Check login credentials in tpen.yml?" % opts.identifier)
exit()
xml = from_sc(jsonspec,
metadata=metadata,
special_chars=special_chars,
numeric_parser=numeric_parser,
text_filter=transcription_filter,
postprocess=postprocess,
members=members)
# Return if we were successful
if xml is not None:
sys.stdout.buffer.write(etree.tostring(xml, encoding="utf-8", pretty_print=True, xml_declaration=True))