Thank you so much for developing this StringParserPEG.jl module. An "invalid index" will occur when parsing characters in [\u4e00-\u9fff] range. The following is the information that have collected.
Program:
#!/usr/local/bin/julia
using Pkg
using InteractiveUtils
using StringParserPEG
pkgversion(m::Module) = Pkg.TOML.parsefile(joinpath(dirname(string(first(methods(m.eval)).file)), "..", "Project.toml"))["version"]
println("""
$(versioninfo())
StringParserPEG Version: $(pkgversion(StringParserPEG))
""")
businessCardGrammar=Grammar("""
start => name
name => r(.{2,4})r
""")
for i in ["Ada", "Adam", "王小明"]
x=parse(businessCardGrammar, i)
println("""
typeof(x): $(typeof(x))
x: $(x)
typeof(x[1]): $(typeof(x[1]))
x[1]: $(x[1])
""")
end
Errors:
ERROR: LoadError: StringIndexError: invalid index [3], valid nearby indices [1]=>'王', [4]=>'小'
Stacktrace:
[1] string_index_err(s::String, i::Int64)
@ Base ./strings/string.jl:12
[2] getindex
@ ./strings/string.jl:263 [inlined]
[3] parse_newcachekey(grammar::Grammar, rule::StringParserPEG.RegexRule, text::String, pos::Int64, cache::Nothing)
@ StringParserPEG ~/.julia/packages/StringParserPEG/eSxgb/src/parse.jl:220
[4] parse(grammar::Grammar, rule::StringParserPEG.RegexRule, text::String, pos::Int64, cache::Nothing)
@ StringParserPEG ~/.julia/packages/StringParserPEG/eSxgb/src/parse.jl:60
[5] parse_newcachekey(grammar::Grammar, rule::StringParserPEG.ReferencedRule, text::String, pos::Int64, cache::Nothing)
@ StringParserPEG ~/.julia/packages/StringParserPEG/eSxgb/src/parse.jl:100
[6] parse(grammar::Grammar, rule::StringParserPEG.ReferencedRule, text::String, pos::Int64, cache::Nothing)
@ StringParserPEG ~/.julia/packages/StringParserPEG/eSxgb/src/parse.jl:60
[7] parse(grammar::Grammar, text::String; cache::Nothing, start::Symbol)
@ StringParserPEG ~/.julia/packages/StringParserPEG/eSxgb/src/parse.jl:42
[8] parse(grammar::Grammar, text::String)
@ StringParserPEG ~/.julia/packages/StringParserPEG/eSxgb/src/parse.jl:41
[9] top-level scope
@ /tmp/testDriveStringParserPEG.jl:11
in expression starting at /tmp/testDriveStringParserPEG.jl:10
Environment:
Julia Version 1.6.2
Commit 1b93d53fc4 (2021-07-14 15:36 UTC)
Platform Info:
OS: macOS (x86_64-apple-darwin18.7.0)
CPU: Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-11.0.1 (ORCJIT, skylake)
Environment:
JULIA_EDITOR = atom -a
JULIA_NUM_THREADS = 4
JULIA_COPY_STACKS = 1
nothing
StringParserPEG Version: 1.1.0
References:
Unicode Character Ranges
https://jrgraphix.net/research/unicode_blocks.php
4E00 — 9FFF | CJK Unified Ideographs
Thank you so much for developing this StringParserPEG.jl module. An "invalid index" will occur when parsing characters in [\u4e00-\u9fff] range. The following is the information that have collected.
Program:
#!/usr/local/bin/julia
using Pkg
using InteractiveUtils
using StringParserPEG
pkgversion(m::Module) = Pkg.TOML.parsefile(joinpath(dirname(string(first(methods(m.eval)).file)), "..", "Project.toml"))["version"]
println("""
$(versioninfo())
StringParserPEG Version: $(pkgversion(StringParserPEG))
""")
businessCardGrammar=Grammar("""
start => name
name => r(.{2,4})r
""")
for i in ["Ada", "Adam", "王小明"]
x=parse(businessCardGrammar, i)
println("""
typeof(x): $(typeof(x))
x: $(x)
typeof(x[1]): $(typeof(x[1]))
x[1]: $(x[1])
""")
end
Errors:
ERROR: LoadError: StringIndexError: invalid index [3], valid nearby indices [1]=>'王', [4]=>'小'
Stacktrace:
[1] string_index_err(s::String, i::Int64)
@ Base ./strings/string.jl:12
[2] getindex
@ ./strings/string.jl:263 [inlined]
[3] parse_newcachekey(grammar::Grammar, rule::StringParserPEG.RegexRule, text::String, pos::Int64, cache::Nothing)
@ StringParserPEG ~/.julia/packages/StringParserPEG/eSxgb/src/parse.jl:220
[4] parse(grammar::Grammar, rule::StringParserPEG.RegexRule, text::String, pos::Int64, cache::Nothing)
@ StringParserPEG ~/.julia/packages/StringParserPEG/eSxgb/src/parse.jl:60
[5] parse_newcachekey(grammar::Grammar, rule::StringParserPEG.ReferencedRule, text::String, pos::Int64, cache::Nothing)
@ StringParserPEG ~/.julia/packages/StringParserPEG/eSxgb/src/parse.jl:100
[6] parse(grammar::Grammar, rule::StringParserPEG.ReferencedRule, text::String, pos::Int64, cache::Nothing)
@ StringParserPEG ~/.julia/packages/StringParserPEG/eSxgb/src/parse.jl:60
[7] parse(grammar::Grammar, text::String; cache::Nothing, start::Symbol)
@ StringParserPEG ~/.julia/packages/StringParserPEG/eSxgb/src/parse.jl:42
[8] parse(grammar::Grammar, text::String)
@ StringParserPEG ~/.julia/packages/StringParserPEG/eSxgb/src/parse.jl:41
[9] top-level scope
@ /tmp/testDriveStringParserPEG.jl:11
in expression starting at /tmp/testDriveStringParserPEG.jl:10
Environment:
Julia Version 1.6.2
Commit 1b93d53fc4 (2021-07-14 15:36 UTC)
Platform Info:
OS: macOS (x86_64-apple-darwin18.7.0)
CPU: Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-11.0.1 (ORCJIT, skylake)
Environment:
JULIA_EDITOR = atom -a
JULIA_NUM_THREADS = 4
JULIA_COPY_STACKS = 1
nothing
StringParserPEG Version: 1.1.0
References:
Unicode Character Ranges
https://jrgraphix.net/research/unicode_blocks.php
4E00 — 9FFF | CJK Unified Ideographs