-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutf8.go
More file actions
84 lines (77 loc) · 2.06 KB
/
utf8.go
File metadata and controls
84 lines (77 loc) · 2.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
package main
import "fmt"
import "math/rand"
func abs(x int) int {
if x >= 0 { return x; }
return -x;
}
func encode(utf32 []rune) []byte {
utf8 := make([]byte, 0);
var numberOfOctets int;
for _, codePoint := range utf32 {
if codePoint <= 0x7F {
numberOfOctets = 1;
} else if codePoint <= 0x7FF {
numberOfOctets = 2;
} else if codePoint <= 0xFFFF {
numberOfOctets = 3;
} else {
numberOfOctets = 4;
}
if numberOfOctets == 1 {
utf8 = append(utf8, (byte)(codePoint));
} else {
utf8 = append(utf8, 0xF0 << abs(numberOfOctets - 4) |
(byte)(codePoint >> (6 * (numberOfOctets - 1))));
for j := numberOfOctets - 1; j > 0; j-- {
utf8 = append(utf8, 0x80 | (byte)(codePoint >> (6 * (j - 1)) & 0x3F));
}
}
}
return utf8;
}
func decode(utf8 []byte) []rune {
var utf32 []rune = make([]rune, 0);
var codePoint rune = 0;
var numberOfOctets int;
for i := 0; i < len(utf8); i++ {
if utf8[i] & 0x80 == 0 {
numberOfOctets = 1;
} else if utf8[i] & 0x20 == 0 {
numberOfOctets = 2;
} else if utf8[i] & 0x10 == 0 {
numberOfOctets = 3;
} else {
numberOfOctets = 4;
}
if numberOfOctets == 1 {
utf32 = append(utf32, (rune)(utf8[i]));
} else {
codePoint = (rune)(utf8[i] & ((1 << (7 - numberOfOctets)) - 1)) <<
(6 * (numberOfOctets - 1));
for j := 1; j < numberOfOctets; j++ {
codePoint = codePoint |
((rune)(utf8[i + j] & 0x3F)) << (6 * (numberOfOctets - 1 - j));
}
i = i + numberOfOctets - 1;
utf32 = append(utf32, codePoint);
}
}
return utf32;
}
func randSeq(n int) string {
b := make([]rune, n)
for i := 0; i < n; i++ {
b[i] = rand.Int31n(1 << 20);
}
return string(b)
}
func main() {
var s string = randSeq(99);
fmt.Printf("%s\n%s\n", s, (string)(encode(([]rune)(s))));
fmt.Println(s == (string)(encode(([]rune)(s))));
fmt.Printf("%d, %d\n\n", len(s), len((string)(encode(([]rune)(s)))));
fmt.Printf("%s\n%s\n", s, (string)(decode(([]byte)(s))));
fmt.Println(s == (string)(decode(([]byte)(s))));
fmt.Printf("%d, %d\n", len(s), len((string)(decode(([]byte)(s)))));
}