-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathcache.py
More file actions
84 lines (67 loc) · 2.51 KB
/
cache.py
File metadata and controls
84 lines (67 loc) · 2.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import torch
from collections import defaultdict
class Singleton(type):
_instances = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
return cls._instances[cls]
class STC_CACHE(metaclass=Singleton):
gen_interval_steps: int
prompt_interval_steps: int
cfg_interval_steps: int
prompt_length: int
transfer_ratio: float
__cache: defaultdict
__step_counter: defaultdict
@classmethod
def new_instance(
cls,
chunk_idx: int = 1,
update_token_ratio: float = 0.25,
acc_time: int=0,
max_mem: int=0,
) -> "STC_CACHE":
ins = cls()
setattr(ins, "chunk_idx", chunk_idx)
setattr(ins, "acc_time", acc_time)
setattr(ins, "max_mem", max_mem)
setattr(ins, "update_token_ratio", update_token_ratio)
ins.init()
return ins
def init(self) -> None:
self.__cache = defaultdict(
lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
)
self.__step_counter = defaultdict(lambda: defaultdict(lambda: 0))
def reset_cache(self, prompt_length: int = 0) -> None:
self.init()
torch.cuda.empty_cache()
self.prompt_length = prompt_length
self.cache_type = "no_cfg"
def set_cache(
self, layer_id: int, feature_name: str, features: torch.Tensor, cache_type: str
) -> None:
self.__cache[self.cache_type][cache_type][layer_id][feature_name] = {
0: features
}
def get_cache(
self, layer_id: int, feature_name: str, cache_type: str
) -> torch.Tensor:
output = self.__cache[self.cache_type][cache_type][layer_id][feature_name][0]
return output
def update_step(self, layer_id: int) -> None:
self.__step_counter[self.cache_type][layer_id] += 1
def refresh_gen(self, layer_id: int = 0) -> bool:
return (self.current_step - 1) % self.gen_interval_steps == 0
def refresh_prompt(self, layer_id: int = 0) -> bool:
return (self.current_step - 1) % self.prompt_interval_steps == 0
def refresh_cfg(self, layer_id: int = 0) -> bool:
return (
self.current_step - 1
) % self.cfg_interval_steps == 0 or self.current_step <= 5
@property
def current_step(self) -> int:
return max(list(self.__step_counter[self.cache_type].values()), default=1)
def __repr__(self):
return f"USE dLLMCache"