-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathwords_in_file.go
More file actions
88 lines (81 loc) · 1.99 KB
/
words_in_file.go
File metadata and controls
88 lines (81 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
// -----------------------------------------------------------------------------
// CMDX Utilities Suite cmdx/[words_in_file.go]
// (c) balarabe@protonmail.com License: GPLv3
// -----------------------------------------------------------------------------
package main
import (
"strings"
"unicode"
fs "github.com/balacode/zr-fs"
)
// wordsInFile _ _
// run cmdx with 'fw' or 'file-words'
// cx fw test_file.txt
//
// Either accepts 1 or 2 arguments.
// The first argument is the name of the input file.
//
// The second argument is the output file, where
// the list of unique words will be written.
func wordsInFile(cmd Command, args []string) {
if len(args) < 1 || len(args) > 2 {
env.Println(
"requires <input-file> and optional <output-file> parameters",
)
return
}
var (
filename = args[0]
fragNo = 0
word = [LongestWord]rune{}
words = make(map[string]int)
)
_ = fs.ReadFileChunks(filename, FileChunkSize+LongestWord,
func(chunk []byte) int64 {
fragNo++
env.Print(" ", fragNo)
// store words in map
var (
wordLen int
hasA bool
hasD bool
)
for _, ch := range string(chunk) {
isA, isD := unicode.IsLetter(ch), unicode.IsDigit(ch)
if isA {
hasA = true
}
if isD {
hasD = true
}
if wordLen < LongestWord && (ch == '_' || isA || isD) {
word[wordLen] = ch
wordLen++
continue
}
if wordLen > 0 {
if hasA && !hasD && wordLen < LongestWord {
s := string(word[:wordLen])
if n, exist := words[s]; exist {
words[s] = n + 1
} else {
words[s] = 1
}
}
hasA = false
hasD = false
wordLen = 0
}
}
return int64(len(chunk))
},
)
// read fragments from file, store words in map
gap := strings.Repeat(" ", 10)
for word, count := range words {
env.Println(word, gap, count)
}
}
// TODO: create Words() function in Zircon-Go lib
// TODO: create a text module or 'tstr'.
// end