-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfuzzy_h.py
More file actions
31 lines (23 loc) · 762 Bytes
/
fuzzy_h.py
File metadata and controls
31 lines (23 loc) · 762 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
"""This script compute the levenshtein distance of each pair of sentences.
This will be part of the features for training the models."""
from fuzzywuzzy import fuzz
import pandas as pd
import numpy as np
f = pd.read_csv("eng_train.csv")
f2 = pd.read_csv("eng_dev_with_labels.csv")
f3 = pd.read_csv("eng_test.csv")
texts = f2["Text"]
sents1 = []
sents2 = []
for sent in texts:
sentences = sent.split("\n", 1)
sents1.append(sentences[0])
sents2.append(sentences[1])
res = []
for i in range(len(sents1)):
similarity = fuzz.partial_ratio(sents1[i], sents2[i])
res.append(similarity)
with open("res_fuzz.txt", 'a+') as f2:
f2.write(f"{similarity}\n")
res = np.array(res)
np.savez_compressed("pap_leven_score_dev_lab", res)