-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnum_of_changes.py
More file actions
39 lines (28 loc) · 1.23 KB
/
num_of_changes.py
File metadata and controls
39 lines (28 loc) · 1.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pandas as pd
import re
def CountSegments(string):
return len(string.split("-"))
def RemoveDash(string):
return "".join(string.split("-")).lower()
def DeletePalatal(string):
string = re.sub(r'k\'', '1', string)
string = re.sub(r'g\'', '1', string)
string = re.sub(r't\'', '1', string)
string = re.sub(r'p\'', '1', string)
string = re.sub(r'x\'', '1', string)
string = re.sub(r'\'', '', string)
string = re.sub(r'ts', '7', string)
string = re.sub(r'd\u0292', '8', string)
string = re.sub(r'[\u02B2\u02B7:\u02D0]', '', string)
return string
def CompareStrings(str1, str2):
count = sum(char1 == char2 for char1, char2 in zip(str1, str2))
return count
df = pd.read_csv("bor_to_count.csv")
df = pd.DataFrame(df)
df["total_segments"] = df["target_ipa"].apply(CountSegments)
df["russian_ipa_nonpal"] = df["russian_ipa"].dropna().apply(DeletePalatal).apply(RemoveDash)
df["target_ipa_nonpal"] = df["target_ipa"].dropna().apply(DeletePalatal).apply(RemoveDash)
df["match_count"] = df.apply(lambda row: CompareStrings(str(row["russian_ipa_nonpal"]), str(row["target_ipa_nonpal"])), axis=1)
df = df.drop(["russian_ipa_nonpal", "target_ipa_nonpal"], axis=1)
df.to_csv("avar_andic_bor_with_ranks.csv")