-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
135 lines (111 loc) · 6.45 KB
/
main.py
File metadata and controls
135 lines (111 loc) · 6.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import random
import time
import re
# Top 300 most used English words
top_300_words = [
"the", "be", "to", "of", "and", "a", "in", "that", "have", "I",
"it", "for", "not", "on", "with", "he", "as", "you", "do", "at",
"this", "but", "his", "by", "from", "they", "we", "say", "her", "she",
"or", "an", "will", "my", "one", "all", "would", "there", "their", "what",
"so", "up", "out", "if", "about", "who", "get", "which", "go", "me",
"when", "make", "can", "like", "time", "no", "just", "him", "know", "take",
"person", "into", "year", "your", "good", "some", "could", "them", "see", "other",
"than", "then", "now", "look", "only", "come", "its", "over", "think", "also",
"back", "after", "use", "two", "how", "our", "work", "first", "well", "way",
"even", "new", "want", "because", "any", "these", "give", "day", "most", "us",
"should", "home", "old", "same", "tell", "too", "show", "try", "hand", "end",
"put", "read", "low", "must", "high", "off", "leave", "here", "eye", "words",
"ask", "need", "side", "once", "set", "boy", "came", "along", "next", "kind",
"went", "open", "run", "don't", "began", "grow", "turn", "seem", "help", "hard",
"start", "might", "late", "miss", "idea", "food", "stop", "room", "air", "mother",
"river", "car", "feet", "care", "second", "group", "book", "carry", "took", "science",
"eat", "room", "friend", "began", "idea", "fish", "mountain", "north", "once", "base",
"hear", "cut", "sure", "watch", "color", "face", "wood", "main", "enough", "plain",
"girl", "usual", "young", "ready", "above", "ever", "red", "list", "though", "feel",
"talk", "bird", "soon", "body", "dog", "family", "direct", "pose", "leave", "song",
"measure", "door", "product", "black", "short", "numeral", "class", "wind", "question",
"happen", "complete", "ship", "area", "half", "rock", "order", "fire", "south", "problem",
"piece", "tell", "knew", "pass", "since", "top", "whole", "king", "size", "stop",
"river", "space", "remember", "clear", "fact", "power", "tail", "nothing", "local", "course",
"both", "full", "vowel", "gold", "brother", "wife", "garden", "common", "certain", "kill",
"final", "ten", "young", "hope", "meet", "rest", "hear", "clear", "show", "list",
"song", "teach", "page", "door", "low", "fall", "mile", "dead", "sun", "fast",
"verb", "five", "bring", "sing", "sit", "class", "floor", "either", "correct", "sleep",
"race", "inch", "thousand", "meet", "behind", "sight", "white", "winter", "game", "strong",
"close", "talk", "front", "shape", "yes", "late", "help", "mark", "rain", "sentence",
"hot", "object", "farm", "ten", "king", "draw", "left", "sleep", "hundred", "deal",
"hour", "rock", "rule", "figure", "certain", "age", "land", "friend", "fit", "join",
"cry", "prize", "feel", "unit", "begin", "inch", "lot", "today", "usual", "silent",
"particular", "seat", "star", "watch", "boat", "chart", "summer", "market", "swim", "sister",
"climb", "sleep", "shop", "race", "the", "ship", "nation", "cool", "oil", "thousand",
"notice", "cause", "floor", "busy", "leg", "exercise", "sent", "indicate", "radio", "atom",
"human", "history", "effect", "electric", "expect", "crop", "modern", "element", "hit", "student",
"corner", "party"
]
# Randomly select a subset of 15 words from the top 300 words in english language
subset = random.sample(top_300_words, 15)
# Remove symbols from target words (not really needed here, just done to make the database clear, and homogeneous)
subset = [re.sub(r'[^\w\s]', '', word) for word in subset]
# Calculate the character error rate within each word (using the levenshtein edit distance dp algo https://www.youtube.com/watch?v=Dd_NgYVOdLk)
def character_error_rate(word1, word2):
len1 = len(word1)
len2 = len(word2)
matrix = [[0] * (len2 + 1) for _ in range(len1 + 1)]
for i in range(len1 + 1):
matrix[i][0] = i
for j in range(len2 + 1):
matrix[0][j] = j
for i in range(1, len1 + 1):
for j in range(1, len2 + 1):
if word1[i - 1] == word2[j - 1]:
matrix[i][j] = matrix[i - 1][j - 1]
else:
insert = matrix[i][j - 1] + 1
delete = matrix[i - 1][j] + 1
substitute = matrix[i - 1][j - 1] + 1
matrix[i][j] = min(insert, delete, substitute)
return matrix[len1][len2] / len(word1)
# Calculate the word error rate based on character errors within words
def word_error_rate(subset, typed_para):
typed_words = typed_para.split()
subset_words = subset
error_count = 0
total_words = max(len(typed_words), len(subset_words))
if len(typed_words) != len(subset):
print("you haven't typed the correct number of words!")
exit()
for typed_word, subset_word in zip(typed_words, subset_words):
error_rate = character_error_rate(subset_word, typed_word)
error_count += error_rate
word_error_rate = (error_count / total_words) * 100
return word_error_rate
# Print the target words to the user
print("Type the following words:")
print(" ".join(subset))
# Start the timer when the user starts typing
start_time = None
typed_para = ""
# Wait for the user to start typing (the user clicks enter (return) to signal it will start typing)
input("Press Enter when you are ready to start typing.")
start_time = time.time()
# Read user input
typed_para = input()
end_time = time.time()
time_taken = end_time - start_time
# Clear any extra symbols the user might have typed (also remove extra spaces between words, which might mess up our calculations)
# Also by adding lower(), after the re.sub() function, it converts all characters in typed_para to lowercase while removing symbols.
typed_para = re.sub(r'[^\w\s]', '', typed_para).lower()
# Calculate the error percentage, and 100 - error percent would be the typing accuracy
error_percent = word_error_rate(subset, typed_para)
# If the user types nonsense, punish.
if error_percent > 50:
print(f"Your word error rate {error_percent:.2f}% was quite high, and hence your accurate speed could not be computed.")
# Else, give them their results.
else:
typed_words = typed_para.split()
num_typed_words = len(typed_words)
speed = num_typed_words * 60 / time_taken
print("******YOUR SCORE REPORT******")
print(f"Your speed is {speed:.2f} words/min")
print(f"The word error rate (based on characters within words) is {error_percent:.2f}%")
print(f"The accuracy is {100-error_percent:.2f}%")