-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_LM_generated_citations.py
More file actions
112 lines (85 loc) · 4.12 KB
/
get_LM_generated_citations.py
File metadata and controls
112 lines (85 loc) · 4.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from generator import CitationGeneratorFast, CitationGenerator, BartCitationGenerator
import json
import evaluate
from tqdm import tqdm
import os
import argparse
from transformers import logging
logging.set_verbosity_error()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model_path", type=str, )
parser.add_argument("--data_path", type=str)
parser.add_argument("--save_path", type=str, )
parser.add_argument("--num_beams", type=int, default = 1 )
parser.add_argument("--start", type=int, default = None )
parser.add_argument("--size", type=int, default = None )
parser.add_argument("--model_architecture", type=str, default = "decoder" )
args = parser.parse_args()
corpus = [ json.loads(line) for line in open( args.data_path, "r") ]
if args.start is None:
args.start = 0
args.size = len(corpus)
else:
if args.size is None:
args.size = len(corpus)
args.save_path += "_%d"%( args.start )
os.makedirs( os.path.dirname(args.save_path), exist_ok=True )
if args.model_architecture == "decoder" and "bart" not in args.model_path:
if args.model_path.endswith("-ct2"):
cit_generator = CitationGeneratorFast( args.model_path )
else:
cit_generator = CitationGenerator( args.model_path )
else:
cit_generator = BartCitationGenerator( args.model_path )
fw = open(args.save_path, "w")
for example in tqdm( corpus[ args.start : args.start + args.size ] ):
gen_cit_uncontrolled = cit_generator.generate(
citing_paper_title = example["citing_paper_content"]["title"] ,
citing_paper_abstract = example["citing_paper_content"]["abstract"],
cited_paper_title = example["cited_paper_content"]["title"],
cited_paper_abstract = example["cited_paper_content"]["abstract"],
text_before_citation = " ".join( example["text_before_citation"] ),
num_beams = args.num_beams,
)
gen_cit_with_intent = cit_generator.generate(
citing_paper_title = example["citing_paper_content"]["title"] ,
citing_paper_abstract = example["citing_paper_content"]["abstract"],
cited_paper_title = example["cited_paper_content"]["title"],
cited_paper_abstract = example["cited_paper_content"]["abstract"],
text_before_citation = " ".join( example["text_before_citation"] ),
intent = example["citation_intent"],
num_beams = args.num_beams,
)
gen_cit_with_intent_and_keywords = cit_generator.generate(
citing_paper_title = example["citing_paper_content"]["title"] ,
citing_paper_abstract = example["citing_paper_content"]["abstract"],
cited_paper_title = example["cited_paper_content"]["title"],
cited_paper_abstract = example["cited_paper_content"]["abstract"],
text_before_citation = " ".join( example["text_before_citation"] ),
intent = example["citation_intent"],
keywords = "; ".join( example["keywords"] ),
num_beams = args.num_beams,
)
example["generated_citations"]= [
{
"model":args.model_path,
"generation":gen_cit_uncontrolled,
"given_citation_intent":None,
"given_keywords":None
},
{
"model":args.model_path,
"generation":gen_cit_with_intent,
"given_citation_intent":example["citation_intent"],
"given_keywords":None
},
{
"model":args.model_path,
"generation":gen_cit_with_intent_and_keywords,
"given_citation_intent":example["citation_intent"],
"given_keywords":example["keywords"]
},
]
fw.write( json.dumps( example ) + "\n" )
fw.close()