forked from mnyrop/annotate
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathRakefile
More file actions
182 lines (147 loc) Β· 7.1 KB
/
Copy pathRakefile
File metadata and controls
182 lines (147 loc) Β· 7.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
require 'fileutils'
require 'json'
require 'csv'
require 'slugify'
require 'sanitize'
require 'net/http'
require 'byebug'
task :default => [:store_annotations]
task :store_annotations do
manifests = []
# create annotations folder for every manifest in iiif folder
Dir['iiif/*/'].each { | m |
mkdir_p "annotations/#{File.basename(m, ".*")}"
manifests << File.basename(m, ".*")
}
manifests.each do | manifest |
puts 'Manifest: ' + manifest
unstored_canvases = Dir["annotations/" + manifest + "/*.json"].sort!
unstored_canvases.each do |canvas|
name = File.basename(canvas, ".*")
dir = "annotations/" + manifest + "/" + name
sum_annotations = JSON.parse(File.read(canvas))
FileUtils::mkdir_p dir # make dir for canvas annotations
make_anno_list(dir,name,manifest) # write canvas annotation list to file
store_anno_array(dir,name,manifest,sum_annotations) # write array of canvas annotations to file
File.delete(canvas) # remove unstored data file
end
# TODO also update if manifest is older than clean-manifest
if !unstored_canvases.empty? || (!File.exist? "iiif/#{manifest}/manifest.json")
puts "Updating #{manifest}"
update_manifest_copy(manifest)
end
make_clippings(manifest)
end
end
def make_anno_list(dir,name,manifest)
listpath = dir + "/" + "list.json"
if !File.exist?(listpath) # make annotation list if necessary
puts "creating " + listpath + ".\n"
File.open(listpath, 'w') do |f|
# use manifest + '/' + name as canvas label, to ensure uniqueness across all manifests
f.write("---\nlayout: null\ncanvas: '" + manifest + '/' + name + "'\n---\n" + '{% assign anno_name = page.canvas | append: "-resources" %}{% assign annotations = site.pages | where: "label", anno_name | first %}{"@context": "http://iiif.io/api/presentation/2/context.json","@id": "{{ site.url }}{{ site.baseurl }}/annotations/' + manifest + '/' + name + '/list.json","@type": "sc:AnnotationList","resources": {{ annotations.content }} }')
end
end
end
def store_anno_array(dir,name,manifest,sum_annotations)
annopath = dir + "/" + name + ".json"
if !File.exist?(annopath) # if no preexisting annotation file
puts "creating " + annopath + ".\n"
else # if preexisting annotation file
puts "appending new annotations to " + annopath + ".\n"
old_annotations = JSON.parse(File.read(annopath).gsub(/\A---(.|\n)*?---/, ""))
sum_annotations = sum_annotations.concat old_annotations # add annotation JSON to array
end
File.open(annopath, 'w') { |f| f.write("---\nlayout: null\nlabel: " + manifest + '/' + name + "-resources\n---\n" + sum_annotations.to_json) }
end
def update_manifest_copy(manifest)
stored_canvases = []
Dir['annotations/' + manifest + "/*/"].each { | c | stored_canvases << File.basename(c, ".*") }
puts "adding annotation references for canvases " + manifest + '/' + stored_canvases.to_s + " to manifest copy."
manifest_json = JSON.parse(File.read("iiif/" + manifest + "/clean-manifest.json").gsub(/\A---(.|\n)*?---/, "").to_s)
canvases = manifest_json["sequences"][0]["canvases"].select {|c|
stored_canvases.include? c["@id"].gsub(/.+\$([0-9]+)\/canvas.*/, '\1')
}
canvases.each do | canvas |
annotation_hash = Hash.new { |hash, key| hash[key] = {} }
this_id = canvas["@id"].gsub(/.+\$([0-9]+)\/canvas.*/, '\1')
annotation_hash["@id"] = "{{ site.url }}{{ site.baseurl }}/annotations/" + manifest + "/" + this_id + "/list.json"
annotation_hash["@type"] = "sc:AnnotationList"
canvas["otherContent"] = Array.new << annotation_hash
end
# embed jekyll url and baseurl in manifest id, so that it will match the uri passed to Mirador
# this is necessary to make the preserveManifestOrder config option take effect
manifest_json['@id'].gsub!('https://iiif.archivelab.org', '{{ site.url }}{{ site.baseurl }}')
File.open("iiif/" + manifest + "/manifest.json", 'w+') { |f| f.write("---\nlayout: null\n---\n"+JSON.pretty_generate(manifest_json)) }
end
def make_clippings(manifest)
manifest_json = JSON.parse(File.read("iiif/" + manifest + "/manifest.json").gsub(/\A---(.|\n)*?---/, "").to_s)
canvasesWithAnnos = manifest_json['sequences'][0]['canvases']
.select { |canvas| canvas['otherContent'] }
.select { |canvas| canvas['otherContent'][0]['@type'] == 'sc:AnnotationList' }
clippings = []
canvasesWithAnnos.each do |canvas|
canvasID = canvas['@id']
listpath = canvas['otherContent'][0]['@id'].gsub('{{ site.url }}{{ site.baseurl }}/', '')
list_json = JSON.parse(File.read('_site/' + listpath).to_s) #TODO remove dependence on generated _site
list_json['resources'].each do |resource|
canvasOn = resource['on'][0]['full']
next 'WTF canvas ID doesn\'t match' unless canvasID == canvasOn
# get clipping metadata: the tags and text that the user entered, and the selected xywh
tags = resource['resource'].select { |r| r['@type'] == 'oa:Tag' }
texts = resource['resource'].select { |r| r['@type'] == 'dctypes:Text' }
xywh = resource['on'][0]['selector']['default']['value'].gsub('xywh=', '')
# build label and csv from specified data elements
labelElements = []
csvElements = {id: resource['@id'], item: manifest, canvas: canvasID}
canvasNum = canvasID.gsub(/.+\$([0-9]+)\/canvas.*/, '\1')
labelElements << canvasNum
csvElements[:canvasNum] = canvasNum
tagElements = []
tags.each do |tag|
labelElements << tag['chars']
tagElements << tag['chars']
end
csvElements[:tags] = tagElements.join('|')
textElements = []
texts.each do |text|
# strip html markup
longfilename = Sanitize.clean(text['chars']).strip
filename = longfilename.length > 180 ? longfilename[0..179] : longfilename
labelElements << filename
textElements << Sanitize.clean(text['chars']).strip
end
csvElements[:texts] = textElements.join('|')
labelElements << xywh
csvElements[:xywh] = xywh
# label ends up like 1-photo-woman-with-film-camera-1235-134-1126-637
label = labelElements.join(' ').slugify
imageRoot = canvas['images'][0]['resource']['service']['@id']
clippingURL = imageRoot + '/' + xywh + '/full/0/default.jpg'
csvElements[:clippingURL] = clippingURL
clippingsPath = 'clippings/' + manifest + '/' + canvasNum
clippingImage = clippingsPath + '/' + label + '.jpg'
csvElements[:clippingImage] = clippingImage
FileUtils.mkdir_p clippingsPath
# fetch clipping image, if not already fetched
if File.exist?(clippingImage)
puts "Not fetching #{clippingImage}"
else
File.write(clippingImage, Net::HTTP.get(URI.parse(clippingURL)))
puts "Fetched #{clippingImage} from #{clippingURL}"
end
clippings << csvElements
end
end
# output clippings csv file
if clippings.count > 0
column_names = clippings.first.keys
s=CSV.generate do |csv|
csv << column_names
clippings.each do |x|
csv << x.values
end
end
File.write('clippings/' + manifest + '/clippings.csv', s)
end
end