-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathgenerate_class_embedding.py
More file actions
94 lines (75 loc) · 3.09 KB
/
generate_class_embedding.py
File metadata and controls
94 lines (75 loc) · 3.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from collections import OrderedDict
from typing import Sequence
import argparse
import json
from typing import List, Sequence, Tuple
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
try:
from transformers import AutoTokenizer, AutoConfig, XLMRobertaModel
except ImportError:
AutoTokenizer = None
HFBertModel = None
class XLMRobertaLanguageBackbone(nn.Module):
def __init__(
self,
ckpt_path,
frozen_modules: Sequence[str] = (),
dropout: float = 0.0,
init_cfg= None,
) -> None:
super().__init__()
if 'base' in ckpt_path:
self.head = nn.Linear(768, 768, bias=True) # XLarge
model_name = "./xlm-roberta-base/"
elif 'large' in ckpt_path:
self.head = nn.Linear(1024, 768, bias=True) # XLarge
model_name = "./xlm-roberta-large/"
self.frozen_modules = frozen_modules
cfg = AutoConfig.from_pretrained(model_name)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = XLMRobertaModel(cfg)
self.language_dim = cfg.hidden_size
# 加载 model 权重
new_state_dict = OrderedDict()
state_dict = torch.load(
ckpt_path,
map_location="cpu",
weights_only=False,
)['state_dict']
for k, v in state_dict.items():
if k.startswith('backbone.text_model.'):
name = k.split("backbone.text_model.")[-1]
new_state_dict[name] = v
msg = self.load_state_dict(new_state_dict, strict=True)
print(msg)
print("TEXT-ENCODER xlm-roberta-base LOADING WEIGHTS !!!!")
def forward(self, text: List[str]):
text = self.tokenizer(text=text, return_tensors="pt", padding=True)
text = text.to(device=self.model.device)
print(text['input_ids'].shape)
txt_feats = self.model(**text)["last_hidden_state"][:, 0]
print(txt_feats.shape)
txt_feats = self.head(txt_feats)
# txt_feats = txt_feats.reshape(-1, num_per_batch[0], txt_feats.shape[-1])
return txt_feats
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--wedetect_checkpoint', type=str, default='')
parser.add_argument('--classname_file', type=str, default='data/texts/coco_zh_class_texts.json')
args = parser.parse_args()
with open(args.classname_file) as f:
name_chinese = json.load(f)
name_chinese = [name[0] for name in name_chinese]
language_encoder = XLMRobertaLanguageBackbone(args.wedetect_checkpoint).cuda()
text_embeddings = []
num_iters = len(name_chinese) // 80 + 1 if len(name_chinese) % 80 != 0 else len(name_chinese) // 80
with torch.no_grad():
for i in range(num_iters):
text_embeddings.append(language_encoder(name_chinese[i*80: (i+1)*80]))
text_embeddings = torch.cat(text_embeddings)
text_embeddings = F.normalize(text_embeddings, dim=-1).cpu().numpy()
print(text_embeddings.shape)
np.save('coco_text_embeddings.npy', text_embeddings)