RustyGradients/Cargo.toml at master · Xzdes/RustyGradients · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
[package]
name = "rusty-gradients"
version = "0.2.0"
edition = "2021"
authors = ["Xzdes"]

description = "A full-stack deep learning framework in Rust for training and deploying Transformer models. Features multi-backend support (CPU/CUDA/Metal/WASM), 62x GPU acceleration, Safetensors serialization, and BPE tokenization."

license = "MIT"


documentation = "https://docs.rs/rusty-gradients"
repository = "https://github.com/Xzdes/RustyGradients"
homepage = "https://github.com/Xzdes/RustyGradients"


keywords = ["machine-learning", "deep-learning", "gpt", "transformer", "cuda"]

categories = ["science", "wasm", "algorithms"]

# Указываем, какой файл README использовать.
readme = "README.md"

[lib]
crate-type = ["cdylib", "rlib"] # cdylib нужен для WASM


[dependencies]
ndarray = "0.16.1"
rand = "0.8.5"
ndarray-rand = "0.15.0"
thiserror = "2.0.16"

# --- НОВЫЕ ЗАВИСИМОСТИ ДЛЯ WASM ---
wasm-bindgen = "0.2"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"

getrandom = { version = "0.2", features = ["js"] }


chrono = { version = "0.4", features = ["serde"] } # Добавляем chrono

log = "0.4.27"

# === Backend Modernization Dependencies ===

# Multi-threading для CPU optimization
rayon = { version = "1.10", optional = true }

# BLAS backend для fast matrix operations (ndarray-linalg лучше чем чистый BLAS)
ndarray-linalg = { version = "0.16", optional = true }
openblas-src = { version = "0.10", optional = true, features = ["cblas", "system"] }
blas-src = { version = "0.10", features = ["openblas"], optional = true }

# Candle - multi-backend ML framework от HuggingFace
candle-core = { version = "0.6", optional = true }
candle-nn = { version = "0.6", optional = true }

# CUDA support (note: feature is cuda-12000 not cuda-12)
cudarc = { version = "0.11", features = ["cuda-12000", "cublas"], optional = true }

# Metal support (Apple Silicon)
metal = { version = "0.28", optional = true }

# Serialization (safetensors для efficient model storage)
safetensors = { version = "0.4", optional = true }
memmap2 = { version = "0.9", optional = true }

# Tokenization (BPE support)
tokenizers = { version = "0.19", optional = true }
tiktoken-rs = { version = "0.5", optional = true }

# HuggingFace Hub integration
hf-hub = { version = "0.3", optional = true }

# Data loading utilities
crossbeam = { version = "0.8", optional = true }


# Для удобной отладки: паника в Rust будет выведена в консоль JS
[dependencies.console_error_panic_hook]
version = "0.1.7"
optional = true

[features]
default = ["cpu"]

# Backend features
cpu = ["dep:rayon"]
cpu-blas = ["cpu", "dep:ndarray-linalg", "dep:openblas-src"]  # BLAS acceleration (10-50x matmul speedup)
cuda = ["dep:candle-core", "dep:cudarc"]
metal = ["metal-backend"]  # Alias for metal-backend
metal-backend = ["dep:candle-core", "dep:metal"]
accelerate = ["dep:candle-core"]  # Apple Silicon optimizations
candle = ["dep:candle-core", "dep:candle-nn"]

# SIMD optimizations (auto-enabled on supported platforms)
simd = []

# Advanced features
serialization = ["dep:safetensors", "dep:memmap2"]
tokenization = ["dep:tokenizers", "dep:tiktoken-rs"]
huggingface = ["hf-hub", "serialization", "tokenization"]
data-parallel = ["dep:crossbeam", "dep:rayon"]

# Legacy/debug features
wasm-debug = ["dep:console_error_panic_hook"]
legacy = []  # Keep old tensor implementation for backward compatibility

[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }

[[bench]]
name = "matmul_benchmark"
harness = false

[[bench]]
name = "blas_comparison"
harness = false

[[bench]]
name = "simd_benchmark"
harness = false

[[bench]]
name = "layernorm_benchmark"
harness = false

[[bench]]
name = "cuda_comparison"
harness = false

[[bench]]
name = "cuda_kernels_bench"
harness = false