forked from tatuylonen/wiktextract
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathluatest1.py
More file actions
258 lines (221 loc) · 5.18 KB
/
luatest1.py
File metadata and controls
258 lines (221 loc) · 5.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
import re
import sys
import lupa
from lupa import LuaRuntime
def filter_attribute_access(obj, attr_name, is_setting):
print("FILTER:", attr_name, is_setting)
if isinstance(attr_name, unicode):
if not attr_name.startswith("_"):
return attr_name
raise AttributeError("access denied")
lua = LuaRuntime(unpack_returned_tuples=True,
register_eval=False,
attribute_filter=filter_attribute_access)
# XXX frame
# XXX mw.html
# XXX mw.language
# XXX mw.message
# XXX mw.site
# XXX mw.text
# XXX mw.title
# XXX mw.uri
# XXX mw.ustring
# XXX loadable: bit32
# XXX loadable: libraryUtil
# XXX loadable: luabit
# XXX mw.wikibase
# XXX mw.wikibase.lexeme
# XXX mw.wikibase.mediainfo
# XXX mw.bcmath
# XXX mw.smw
# XXX mw.ext.data
# XXX mw.ext.cargo
# XXX mw.ext.cattools
# XXX mw.ext.FlaggedRevs
# XXX mw.ext.TitleBlackList
# XXX mw.ext.articlePlaceholder
sandbox = r"""
local env = _ENV
unpack = table.unpack
function new_loader(modname)
modname = string.gsub(modname, ":", "/")
modname = string.gsub(modname, " ", "_")
modname = string.gsub(modname, "%.", "/")
path = package.searchpath(modname, "./mediawiki-extensions-Scribunto/includes/engines/LuaCommon/lualib/?.lua;./pages/?.txt") or error("MODULE NOT FOUND: " .. modname)
print("FOUND", modname, "->", path)
local file = io.open(path, "rb")
local content = file:read("*a")
content = string.gsub(content, "%%\\%[", "%%[")
file:close()
ret = assert(load(content, path, "bt", env))
return ret
end
package.searchers = {}
package.searchers[0] = nil
package.searchers[1] = new_loader
ustring = require("ustring.ustring")
function mw_loadData(module)
fn = new_loader(module)
return fn()
end
function mw_dumpObject(obj)
print("MW_DUMPOBJECT")
end
function mw_log(...)
print("MW_LOG")
end
function mw_logObject(obj)
print("MW_LOGOBJECT")
end
function mw_hash_hashValue(algo, value)
print("MW_HASH_HASHVALUE")
end
function mw_hash_listAlgorithms()
print("MW_HASH_LISTALGORITHMS")
return {}
end
local new_debug = { traceback = debug.traceback }
local new_os = { clock = os.clock, date = os.date, difftime = os.difftime,
time = os.time }
local mw_hash = {
hashValue = mw_hash_hashValue,
listAlgorithms = mw_hash_listAlgorithms,
}
local title_obj = {
namespace = 0,
text = "TESTWORDT",
fullPagename = "TESTWORDT",
nsText = "",
subpageText = "TESTWORDT",
}
local title = {
}
function title.getCurrentTitle()
return title_obj
end
mw_text = {}
function mw_text.trim(val)
return (val:gsub("^%s*(.-)%s*$", "%1"))
end
function mw_text.split(text, pattern, plain)
local ret = {}
for m in mw_text.gsplit(text, pattern, plain) do
ret[#ret + 1] = m
end
return ret
end
function mw_text.gsplit(text, pattern, plain)
local s, l = 1, ustring.len(text)
return function ()
if s then
local e, n = ustring.find(text, pattern, s, plain)
local ret
if not e then
ret = ustring.sub(text, s)
s = nil
elseif n < e then
ret = ustring.sub(text, s, e)
if e < l then
s = e + 1
else
s = nil
end
else
ret = e > s and ustring.sub(text, s, e - 1) or ''
s = n + 1
end
return ret
end
end, nil, nil
end
mw = {
loadData = mw_loadData,
dumpObject = mw_dumpObject,
log = mw_log,
logObject = mw_logObject,
hash = mw_hash,
title = title,
text = mw_text,
ustring = ustring,
getCurrentFrame = function() return frame end,
}
function page_getTitle(self)
print("page_getTitle called")
return "TESTWORD"
end
page_frame = {
getTitle = page_getTitle,
args = {"talossa"},
}
function frame_getTitle(self)
print("frame_getTitle called")
return "Module:IPA"
end
function frame_getParent(self)
print("frame_getParent called")
return page_frame
end
frame = {
args = { "nouns" },
getTitle = frame_getTitle,
getParent = frame_getParent,
}
print("ARGS[0]", frame.args[0])
print("ARGS[1]", frame.args[1])
env = {}
env["math"] = math
env["string"] = string
env["table"] = table
env["print"] = print
env["require"] = require
env["_VERSION"] = _VERSION
env["assert"] = assert
env["unpack"] = table.unpack
env["error"] = error
env["getmetatable"] = getmetatable
env["next"] = next
env["pairs"] = pairs
env["ipairs"] = ipairs
env["pcall"] = pcall
env["rawequal"] = rawequal
env["rawget"] = rawget
env["rawset"] = rawset
env["select"] = select
env["setmetatable"] = setmetatable
env["tonumber"] = tonumber
env["tostring"] = tostring
env["type"] = type
env["xpcall"] = xpcall
env["debug"] = new_debug
env["os"] = new_os
env["mw"] = mw
env["frame"] = frame
env["_G"] = env
local _ENV = env
"""
code = """
m = require("Module:fi-IPA")
print("LOADED", m)
if m == nil then
return nil
end
local v = m.IPA(frame)
print("RESULT:", v)
return v
"""
#m = require("Module:et-IPA")
#print("LOADED", m)
#ipa = m.IPA(frame)
#print("IPA", ipa)
#
#sandbox.math = lua.globals().math
#sandbox.string = lua.globals().string
# etc
#setfenv(0, sandbox)
ret = lua.execute(sandbox + code)
print("RET", ret)
ret = re.sub(r"<.*?>", "", ret)
ret = re.sub(r" ", "", ret)
ret = re.sub(r"\[\[(Wiktionary|Category|Appendix):[^]]*\]\]", "", ret)
ret = re.sub(r"\(\):", "", ret)
print("CLEAN", ret)