-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmediawiki_standard_indexer.rb
More file actions
66 lines (59 loc) · 1.99 KB
/
mediawiki_standard_indexer.rb
File metadata and controls
66 lines (59 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
require 'require_all'
require_all '../dataspects/lib'
require 'yaml'
require 'awesome_print'
$profiles = YAML.load_file('/home/lex/profiles.yml')
module Dataspects
class MediaWikiStandardIndexer < Indexer
def initialize(es_url:, tika_url:)
super
# Reset index for development
index = 'dataspectspublic'
@esc.client.indices.delete(index: index)
@esc.client.indices.create(index: index)
@esc.client.indices.close(index: index)
@esc.client.indices.put_settings(
index: index,
body: JSON.parse(File.read(
'../dataspects/lib/dataspects/indexing/standard_index_settings.json'))
)
@esc.client.indices.put_mapping(
index: index,
type: 'doc',
body: JSON.parse(File.read(
'../dataspects/lib/dataspects/indexing/standard_index_mapping.json'))
)
@esc.client.indices.open(index: index)
end
def execute
# Specify a MediaWiki as the resource silo
label = 'cookbook.findandlearn.net'
mw = MediaWiki.new(
url: $profiles[label]['url'],
user: $profiles[label]['user'],
password: $profiles[label]['password'],
log_in: :must_log_in
)
# [[ProvidesCustomizationPossibilityFor::ResourceSilo level]]
mw.originatedFromResourceSiloLabel = "FindAndLearn::Cookbook"
mw.originatedFromResourceSiloID = "cookbook.findandlearn.net"
# Specify a facet of MediaWiki's resources and iterate through resources
mw.resources_from_CATEGORY("Entity") do |re|
# Iterate through a resource's entities
re.entities.each do |entity|
# Store esdoc to escluster
@esc.index(
body: entity.esdoc,
index: "dataspectspublic"
)
$logger.info("#{entity.hasEntityURL} indexed...")
end
end
end
end
end
label = 'es.dataspects.com'
mwsi = Dataspects::MediaWikiStandardIndexer.new(
tika_url: "http://10.100.0.123:9998",
es_url: $profiles[label]['url']
).execute