From 0ed125e96697a78ae53f81ae87dea56c98972750 Mon Sep 17 00:00:00 2001
From: shelllee <shelllee@outlook.com>
Date: Fri, 20 Aug 2021 21:22:52 +0800
Subject: [PATCH 1/3] Fixed: "UnicodeDecodeError: 'gbk' codec can't decode byte
 0xf1 in position 449: illegal multibyte sequence".

---
 analyzer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/analyzer.py b/analyzer.py
index f42448a..7bdc179 100644
--- a/analyzer.py
+++ b/analyzer.py
@@ -123,7 +123,7 @@ def parse(self, filepath):
         # File id 0 is always the current file (we store only the base file name without .txt extension).
         self._external_references[0] = self._file_index.get_id(os.path.basename(filepath[:-4]))
 
-        with open(filepath) as f:
+        with open(filepath, encoding="latin1") as f:
             line = f.readline()
             # Parse external references.
             if line == "External References\n":

From 39a97c6ae592ccf4f1fc4b985849c48f88f535cd Mon Sep 17 00:00:00 2001
From: shelllee <shelllee@outlook.com>
Date: Fri, 20 Aug 2021 21:40:15 +0800
Subject: [PATCH 2/3] Maybe better.

---
 analyzer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/analyzer.py b/analyzer.py
index 7bdc179..58cf622 100644
--- a/analyzer.py
+++ b/analyzer.py
@@ -142,7 +142,7 @@ def parse(self, filepath):
                         global_index = self._file_index.get_id(file)
                         self._external_references[local_index] = global_index
 
-        with open(filepath, errors='ignore') as f:
+        with open(filepath, encoding="latin1") as f:
             data = f.read()
 
         # Parse the whole file, extract all objects.

From 9e6eed0b34f4f97e0bdd47f30656f17e1fb5d7d3 Mon Sep 17 00:00:00 2001
From: shelllee <shelllee@outlook.com>
Date: Wed, 22 Sep 2021 18:49:34 +0800
Subject: [PATCH 3/3] Read as latin1 and try decode to utf-8 line by line.

---
 analyzer.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/analyzer.py b/analyzer.py
index 58cf622..b5b0d69 100644
--- a/analyzer.py
+++ b/analyzer.py
@@ -142,15 +142,29 @@ def parse(self, filepath):
                         global_index = self._file_index.get_id(file)
                         self._external_references[local_index] = global_index
 
+        # read as latin1 and try decode to utf-8 line by line
+        data = ""
+
         with open(filepath, encoding="latin1") as f:
-            data = f.read()
+            line = f.readline()
+            try:
+                line = line.encode("latin1").decode("utf-8", "ignore")
+            finally:
+                data += line
+
+            while line:
+                line = f.readline()
+                try:
+                    line = line.encode("latin1").decode("utf-8", "ignore")
+                finally:
+                    data += line
 
         # Parse the whole file, extract all objects.
         regex = re.compile(r"ID: (\-?[a-f0-9]+) \(ClassID: (\d+)\) (\w+)([\s\S]*?(?=(\n{2,}ID:|$)))")
         matches = regex.findall(data)
 
         objects = {}
-    
+
         # Parse individual objects.
         for match in matches:
             try: