Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
163 commits
Select commit Hold shift + click to select a range
7a90cf7
Merge pull request #42 from vincent-maillou/dev
vincent-maillou Jan 20, 2025
1313c6d
Merge pull request #43 from vincent-maillou/dev
vincent-maillou Jan 23, 2025
07a2c90
Merge pull request #53 from vincent-maillou/dev
vincent-maillou Feb 18, 2025
dc33c58
Merge pull request #58 from vincent-maillou/dev
vincent-maillou Mar 19, 2025
d91866d
Merge pull request #60 from vincent-maillou/dev
vincent-maillou Mar 26, 2025
32eea2f
added timers for comm
vincent-maillou Apr 5, 2025
2dc799b
added nccl
vincent-maillou Apr 5, 2025
2d66457
removed explicit comm world call
vincent-maillou Apr 5, 2025
791daa7
added nccl synch
vincent-maillou Apr 5, 2025
f87507a
Merge pull request #62 from vincent-maillou/sc25
vincent-maillou Apr 24, 2025
9dca6a8
first implementation of trsm from cupy and scipy for left and right h…
03szust May 27, 2025
7e3884e
matmul implementation
03szust Jun 4, 2025
08a7687
used serinv_matmul once for testing
03szust Jun 4, 2025
dfef172
used serinv_solve_triangular once for testing
03szust Jun 4, 2025
0b9ceb7
updated solve_triangular_deive to newer cupy implementation
03szust Jun 4, 2025
b073708
reomved serinv_solve_triangular
03szust Jun 4, 2025
0ec3284
debug messages
03szust Jun 4, 2025
1765770
print xp
03szust Jun 4, 2025
3c56e09
fixed module tuple
03szust Jun 4, 2025
bc1d67f
removed transpose
03szust Jun 4, 2025
d756ab9
print trsm func
03szust Jun 4, 2025
cf79d8d
changed print
03szust Jun 4, 2025
4554d59
changed alpha to 1
03szust Jun 4, 2025
5a84dea
removed one
03szust Jun 4, 2025
a7d4c18
changed matmul host to own implementation
03szust Jun 4, 2025
3e92f89
changed array ordering
03szust Jun 4, 2025
372ea0e
changed name of matmul function
03szust Jun 4, 2025
cc5138e
expose trans param for matmul
03szust Jun 4, 2025
7c4f97b
removed local functions from pobtas to prepare for renaming and moving
03szust Jun 10, 2025
b56e591
changed solve_triangluar to trsm in pobtaf
03szust Jun 10, 2025
bdea80f
removed löeftover side parameter from pobtas
03szust Jun 10, 2025
3bd5108
removed double conjugate once for testing side
03szust Jun 10, 2025
5f25de7
test tom check schape and size of trsm with side
03szust Jun 10, 2025
332e1eb
forced error to test shapes
03szust Jun 10, 2025
d5c59a4
removed some debug messages
03szust Jun 10, 2025
24a7bb0
more testing
03szust Jun 10, 2025
1131500
more debug
03szust Jun 10, 2025
fda8456
swapped A and B in test
03szust Jun 10, 2025
e1b5775
swapped back
03szust Jun 10, 2025
df626e3
transpose L
03szust Jun 10, 2025
0d60090
typo
03szust Jun 10, 2025
142acdc
changed conj.T to the trans param
03szust Jun 10, 2025
bfbdcf1
actually implement frist trsm
03szust Jun 10, 2025
e80022c
changed all trsm's
03szust Jun 10, 2025
2a82910
after the previous version faailed, this is the second attempt
03szust Jun 10, 2025
df87182
removed side from arrow because of dim mismatch, added it to other ar…
03szust Jun 10, 2025
db39806
implemented trsm side right for all non arrow solves
03szust Jun 10, 2025
91b6d9b
imported cupy gemm to local
03szust Jun 10, 2025
d8c1d7a
added error to test
03szust Jun 10, 2025
8730201
fixed error
03szust Jun 10, 2025
e894146
implemented one provsionary gemm in pobtaf
03szust Jun 10, 2025
d2c3cd4
removed error for testing
03szust Jun 10, 2025
d251adb
fixed validating array if no array was present to begin with
03szust Jun 10, 2025
58429da
fixed c1 one not existing if c was none
03szust Jun 10, 2025
af86da8
changed gemm to accomodate in place operations
03szust Jun 10, 2025
5759621
changed first gemm to trans_b = c
03szust Jun 10, 2025
f3fb2b5
fixed different trans name
03szust Jun 10, 2025
a48f568
used alpha param on first gemm
03szust Jun 10, 2025
69da4ae
removed alpha and beta hardcoding
03szust Jun 10, 2025
4e6a81a
changed to minus
03szust Jun 10, 2025
f148ab3
inserted some debug messages
03szust Jun 10, 2025
c3de746
reverted minus
03szust Jun 10, 2025
dac0489
exposed alpha, beta and c for host gemm
03szust Jun 10, 2025
cada2cb
convert alpha to complex for cgemm and zgemm host
03szust Jun 10, 2025
13e2b79
inser dytpe debug
03szust Jun 10, 2025
9d85d22
changed type debug
03szust Jun 10, 2025
ce4f0bc
changed type debug again
03szust Jun 10, 2025
fc12dc1
swapped order in function call
03szust Jun 10, 2025
7887bb0
removed debug
03szust Jun 10, 2025
1ae08d4
fully use gemm at first location
03szust Jun 10, 2025
cfd7bd7
changed check for existing c
03szust Jun 10, 2025
993c17f
fixed c not being able to be true
03szust Jun 10, 2025
c3aa505
fixed c again
03szust Jun 10, 2025
a38c39d
further c fix
03szust Jun 10, 2025
a873567
second gemm
03szust Jun 10, 2025
73eeabe
removed square matrix check in gemm that was leftover from trsm
03szust Jun 10, 2025
899d7c9
changed input validation
03szust Jun 10, 2025
e02b484
third gemm
03szust Jun 10, 2025
b3b8c22
full normal pobtaf gemm implemented
03szust Jun 10, 2025
0bf8f58
gemm in permuted pobtaf
03szust Jun 10, 2025
50312bb
rollback to just one gemm
03szust Jun 10, 2025
2cf395a
removed leftover conj t
03szust Jun 10, 2025
3b3b244
rollback to 1 gemm in permuted
03szust Jun 10, 2025
88b3a4c
next gemm in permuted
03szust Jun 10, 2025
83f7247
another gemm
03szust Jun 10, 2025
7cae093
next gemm
03szust Jun 10, 2025
3f889e1
smaller gemm
03szust Jun 10, 2025
3c85f7b
last permuted gemm
03szust Jun 10, 2025
ad936cb
first gemm in streaming
03szust Jun 10, 2025
9c4f496
second gemm streaming
03szust Jun 10, 2025
e0e5bdd
third gemm streaming
03szust Jun 10, 2025
4bb8e6e
two permuted streaming gemms
03szust Jun 10, 2025
480d982
implemented gemms for permuted streaming
03szust Jun 10, 2025
bfadd08
implemented a form of syrk/herk and added a error for testing
03szust Jun 11, 2025
4e6bbde
added another print for debug
03szust Jun 11, 2025
f8ccb36
added another print
03szust Jun 11, 2025
256e3d0
implemented syherk. sadly it's not yet useful
03szust Jun 11, 2025
9d8ea28
removed debug prints
03szust Jun 11, 2025
5885338
added test error
03szust Jun 11, 2025
1436b7f
fixed typo
03szust Jun 11, 2025
b8a5a25
moved test
03szust Jun 11, 2025
2bf27a4
attempt for using syherk
03szust Jun 11, 2025
e62d9ff
missing parenthesis
03szust Jun 11, 2025
bc52ddc
changed input for _syherk
03szust Jun 11, 2025
cf3f722
removed iteration error
03szust Jun 11, 2025
3b92a1d
attempt at implementing herk
03szust Jun 11, 2025
f7ae1cf
typo
03szust Jun 11, 2025
dc9b9c2
fixed path
03szust Jun 11, 2025
59831e1
created init files
03szust Jun 11, 2025
2c182ba
more init files
03szust Jun 11, 2025
defea4d
moved cupy part to try
03szust Jun 11, 2025
025673e
put import back
03szust Jun 11, 2025
0073cd0
renamed cupyfix_backends to backends
03szust Jun 11, 2025
a20eaec
reverted change because it didn't help
03szust Jun 11, 2025
5b4de18
debug import
03szust Jun 11, 2025
718ff3b
created new module to check if something is wrong
03szust Jun 11, 2025
25611ef
updated test module
03szust Jun 11, 2025
5a8f268
attempt to fix import problem
03szust Jun 11, 2025
01b53bc
debugging
03szust Jun 11, 2025
bd0e043
further debugging
03szust Jun 11, 2025
8334fdb
more debug
03szust Jun 11, 2025
32b61bb
try to import test module
03szust Jun 11, 2025
2c4d300
attempt at setup.py
03szust Jun 11, 2025
eb71144
changed c in cython
03szust Jun 11, 2025
7182951
reverted to big C
03szust Jun 11, 2025
884fb3a
moved backend folder
03szust Jun 11, 2025
6eef7f6
attempt to fix install
03szust Jun 11, 2025
9b677f2
fixing file path
03szust Jun 11, 2025
1047aa2
more path fixing
03szust Jun 11, 2025
aca1e0d
changed imports
03szust Jun 11, 2025
bf6d0f3
included missing file
03szust Jun 11, 2025
eadbd48
changed source order
03szust Jun 11, 2025
525dd12
added context declarations
03szust Jun 11, 2025
fdd6361
cimporting cublas
03szust Jun 11, 2025
13aee99
removed context
03szust Jun 11, 2025
e605747
try to correctly import cublas.pxd
03szust Jun 11, 2025
2918d17
import fix
03szust Jun 11, 2025
d497ffc
moving the typedef
03szust Jun 11, 2025
c4054f0
import intptr
03szust Jun 11, 2025
30b61f6
added setstream
03szust Jun 11, 2025
1e886c3
added setstrea, better
03szust Jun 11, 2025
124c41f
added _setstream
03szust Jun 11, 2025
dc99cbd
changed path
03szust Jun 11, 2025
ca620c9
renamed package
03szust Jun 11, 2025
0ff2ddc
renamedpackage again
03szust Jun 11, 2025
5ac8805
hopefully final fix for name
03szust Jun 11, 2025
a9b771b
further path fix
03szust Jun 11, 2025
cff3642
further renaming
03szust Jun 11, 2025
cd6f2a9
continuous renaming
03szust Jun 11, 2025
35c5c07
changed loaction of.h files
03szust Jun 11, 2025
820e1b6
changed dirs
03szust Jun 11, 2025
8115177
bypass include conditions
03szust Jun 11, 2025
04610ca
fixed missing endif
03szust Jun 11, 2025
b3119ad
tmore workarounds for possibly unneccessary includes
03szust Jun 11, 2025
5398b4a
changed include to cuda
03szust Jun 11, 2025
ec1a42c
include cuda
03szust Jun 12, 2025
ff95437
include cuda lib
03szust Jun 12, 2025
d563cf4
include hopefully correct path
03szust Jun 12, 2025
51a85d7
removed empty os.join
03szust Jun 12, 2025
2d8bce7
changed imported header
03szust Jun 12, 2025
cfb1d17
removed conflicting cdefs
03szust Jun 12, 2025
2d92e23
removed call to attempt of selfmade cuda api
03szust Jun 12, 2025
50f9183
damagge control
03szust Jun 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
from setuptools import setup, Extension
from Cython.Build import cythonize
import os

CONDA_PREFIX = os.environ.get("CONDA_PREFIX", "")

CUDA_INCLUDE = os.path.join(CONDA_PREFIX, "targets", "x86_64-linux", "include")



ext = Extension(
name="cupyfix_backends.cuda.libs.cublas",
sources=[
"src/serinv/cupyfix_backends/cuda/libs/cublas.pyx"
],
include_dirs=["cupyfix_backends/cuda/libs",
"cupyfix_backends/hip",
"cupyfix_backends/stub",
"cupyfix_backends/cuda",
"cupyfix_backends",
CUDA_INCLUDE
],

)

setup(
name="cupyfix_backends",
ext_modules=cythonize([ext]),
packages=["src/serinv/cupyfix_backends.cuda.libs"],
)
"""
6 changes: 3 additions & 3 deletions src/serinv/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def _use_nccl(comm):
return False


def _get_nccl_parameters(arr, comm, op: str):
def _get_nccl_parameters(arr, comm, rank, op: str):
"""Get the NCCL parameters for the given operation."""
if np.iscomplexobj(arr):
factor = 2
Expand All @@ -172,8 +172,8 @@ def _get_nccl_parameters(arr, comm, op: str):

if backend_flags["nccl_avail"]:
if op == "allgather":
count = (arr.size // comm.size) * factor
displacement = count * comm.rank * arr.dtype.itemsize
count = (arr.size // comm.size()) * factor
displacement = count * rank * (arr.dtype.itemsize // factor)
elif op == "allreduce":
count = arr.size * factor
displacement = 0
Expand Down
Loading
Loading