-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathevaluator.old.py
More file actions
105 lines (90 loc) · 3.63 KB
/
evaluator.old.py
File metadata and controls
105 lines (90 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import sys
import json
import pdfplumber
from docx import Document
import os
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
def extract_text(file_path):
file_ext = os.path.splitext(file_path)[1].lower()
if file_ext == '.pdf':
return extract_pdf_text(file_path)
elif file_ext == '.docx':
return extract_docx_text(file_path)
elif file_ext == '.txt':
with open(file_path, 'r', encoding='utf-8') as f:
return f.read()
else:
raise ValueError("Unsupported file format")
def extract_pdf_text(pdf_path):
text = ""
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
text += page.extract_text() or ""
return text
def extract_docx_text(docx_path):
doc = Document(docx_path)
text = []
for paragraph in doc.paragraphs:
text.append(paragraph.text)
return '\n'.join(text)
class ProposalEvaluator:
def __init__(self):
self.model_name = "distilbert-base-uncased"
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
self.model = AutoModelForSequenceClassification.from_pretrained(
self.model_name,
num_labels=2
)
self.model.eval()
def evaluate_criteria(self, text, criterion):
inputs = self.tokenizer(f"{criterion}: {text[:1000]}",
return_tensors="pt",
truncation=True,
padding=True)
with torch.no_grad():
outputs = self.model(**inputs)
score = F.softmax(outputs.logits, dim=1)[0][1].item()
return score
def evaluate_proposal(self, text):
scores = {
'relevance': int(self.evaluate_criteria(text, "relevance") * 10),
'originality': int(self.evaluate_criteria(text, "originality") * 15),
'clarity': int(self.evaluate_criteria(text, "clarity") * 10),
'methodology': int(self.evaluate_criteria(text, "methodology") * 15),
'literature': int(self.evaluate_criteria(text, "literature") * 10),
'team': int(self.evaluate_criteria(text, "team composition") * 10),
'feasibility': int(self.evaluate_criteria(text, "feasibility") * 10),
'budget': int(self.evaluate_criteria(text, "budget") * 10),
'outcomes': int(self.evaluate_criteria(text, "outcomes") * 5),
'sustainability': int(self.evaluate_criteria(text, "sustainability") * 5)
}
scores['total'] = sum(scores.values())
comment = self.generate_comment(scores)
return {
'scores': scores,
'comment': comment
}
def generate_comment(self, scores):
strengths = []
improvements = []
for criterion, score in scores.items():
if criterion != 'total':
if score >= 7:
strengths.append(f"Strong {criterion}")
elif score <= 5:
improvements.append(f"Could improve {criterion}")
comment = "Evaluation Summary:\n\n"
comment += "Strengths:\n- " + "\n- ".join(strengths) + "\n\n"
comment += "Areas for Improvement:\n- " + "\n- ".join(improvements)
return comment
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python evaluator.py <file_path>")
sys.exit(1)
file_path = sys.argv[1]
text = extract_text(file_path)
evaluator = ProposalEvaluator()
result = evaluator.evaluate_proposal(text)
print(json.dumps(result))