diff --git a/AmMatrix/CMakeLists.txt b/AmMatrix/CMakeLists.txt index fc78e90..ac51a3a 100644 --- a/AmMatrix/CMakeLists.txt +++ b/AmMatrix/CMakeLists.txt @@ -1,9 +1,11 @@ project( AmMatrix ) -cmake_minimum_required( VERSION 2.8.7 ) - +cmake_minimum_required(VERSION 3.5...4.3) + #Make fast, lean and platform independent binaries.. -set(CMAKE_CXX_FLAGS "-s -O3 -fPIC -march=native -mtune=native") -set(CMAKE_C_FLAGS "-s -O3 -fPIC -march=native -mtune=native") +if(NOT MSVC) + set(CMAKE_CXX_FLAGS "-s -O3 -fPIC -march=native -mtune=native") + set(CMAKE_C_FLAGS "-s -O3 -fPIC -march=native -mtune=native") +endif() OPTION(INTEL_OPTIMIZATIONS OFF) @@ -11,6 +13,10 @@ if (INTEL_OPTIMIZATIONS) add_definitions(-DINTEL_OPTIMIZATIONS) endif(INTEL_OPTIMIZATIONS) +if(MSVC) + add_definitions(-D_USE_MATH_DEFINES) +endif() + add_library( AmMatrix STATIC collisions.c @@ -22,21 +28,26 @@ add_library( matrixMultiplicationOptimization.c matrixMultiplicationOptimization.h matrixOpenGL.c - matrixOpenGL.h + matrixOpenGL.h ocvStaging.c ocvStaging.h solveHomography.c - solveLinearSystemGJ.c + solveLinearSystemGJ.c simpleRenderer.c solids.c ) -target_link_libraries(AmMatrix m rt ) +if(NOT WIN32) + target_link_libraries(AmMatrix m rt ) +endif() -add_subdirectory(TestCPUOptimizedInstructionSet/) +add_subdirectory(TestCPUOptimizedInstructionSet/) -set_target_properties(AmMatrix PROPERTIES - DEBUG_POSTFIX "D" - ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" - LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" - ) +# --------------------------------------------------------------------------- +# Remove directory overrides to let CMake manage output paths automatically. +# --------------------------------------------------------------------------- +# set_target_properties(AmMatrix PROPERTIES +# DEBUG_POSTFIX "D" +# ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" +# LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" +# RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" +# ) diff --git a/AmMatrix/OpenCL/CMakeLists.txt b/AmMatrix/OpenCL/CMakeLists.txt index 1299764..578e647 100644 --- a/AmMatrix/OpenCL/CMakeLists.txt +++ b/AmMatrix/OpenCL/CMakeLists.txt @@ -1,9 +1,9 @@ project( matmul ) -cmake_minimum_required( VERSION 2.8.7 ) - +cmake_minimum_required(VERSION 3.5...4.3) + #Make fast, lean and platform independent binaries.. set(CMAKE_CXX_FLAGS "-s -O3 -fPIC -march=native -mtune=native") -set(CMAKE_C_FLAGS "-s -O3 -fPIC -march=native -mtune=native") +set(CMAKE_C_FLAGS "-s -O3 -fPIC -march=native -mtune=native") OPTION(OPENCL_OPTIMIZATIONS OFF) @@ -13,7 +13,7 @@ endif(OPENCL_OPTIMIZATIONS) set_source_files_properties(mat_mul.cl PROPERTIES HEADER_FILE_ONLY TRUE) #add_library(matmul STATIC mat_mul.c) -add_executable(matmul mat_mul.c ) +add_executable(matmul mat_mul.c ) target_link_libraries(matmul m rt OpenCL ) diff --git a/AmMatrix/TestCPUOptimizedInstructionSet/CMakeLists.txt b/AmMatrix/TestCPUOptimizedInstructionSet/CMakeLists.txt index f154ab1..362f1d8 100644 --- a/AmMatrix/TestCPUOptimizedInstructionSet/CMakeLists.txt +++ b/AmMatrix/TestCPUOptimizedInstructionSet/CMakeLists.txt @@ -1,16 +1,20 @@ -project( TestCPUOptimizedInstructionSet ) -cmake_minimum_required( VERSION 2.8.7 ) +project( TestCPUOptimizedInstructionSet ) +cmake_minimum_required(VERSION 3.5...4.3) find_package(OpenCV REQUIRED) include_directories(${OpenCV_INCLUDE_DIRS}) - -add_executable(TestCPUOptimizedInstructionSet main.c ../matrix4x4Tools.c ) -target_link_libraries(TestCPUOptimizedInstructionSet rt dl m AmMatrix ) + +add_executable(TestCPUOptimizedInstructionSet main.c ../matrix4x4Tools.c ) +if (UNIX) + target_link_libraries(TestCPUOptimizedInstructionSet rt dl m AmMatrix ) +else() + target_link_libraries(TestCPUOptimizedInstructionSet AmMatrix ) +endif() set_target_properties(TestCPUOptimizedInstructionSet PROPERTIES DEBUG_POSTFIX "D") -add_dependencies(TestCPUOptimizedInstructionSet AmMatrix) +add_dependencies(TestCPUOptimizedInstructionSet AmMatrix) -set_target_properties(TestCPUOptimizedInstructionSet PROPERTIES +set_target_properties(TestCPUOptimizedInstructionSet PROPERTIES DEBUG_POSTFIX "D" ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" diff --git a/AmMatrix/TestCPUOptimizedInstructionSet/main.c b/AmMatrix/TestCPUOptimizedInstructionSet/main.c index fe73699..9b0fd87 100644 --- a/AmMatrix/TestCPUOptimizedInstructionSet/main.c +++ b/AmMatrix/TestCPUOptimizedInstructionSet/main.c @@ -1,6 +1,22 @@ #include #include +#ifdef _WIN32 +#include +#include +struct timespec { + long tv_sec; + long tv_nsec; +}; +#define CLOCK_MONOTONIC 0 +static int clock_gettime(int ignore, struct timespec* tv) { + uint64_t ns = (uint64_t)GetTickCount64() * 1000000; + tv->tv_sec = (long)(ns / 1000000000); + tv->tv_nsec = (long)(ns % 1000000000); + return 0; +} +#else #include +#endif #include "../matrix4x4Tools.h" @@ -33,28 +49,28 @@ int main() exit(0); } - int i=0; - + int i=0; + struct Matrix4x4OfFloats testResultOptimized={0}; struct Matrix4x4OfFloats testResultUnoptimized={0}; struct Matrix4x4OfFloats matrixA={0}; struct Matrix4x4OfFloats matrixB={0}; - + //Set matrices to identity - matrixA.m[0]=1.0; matrixA.m[5]=1.0; matrixA.m[10]=1.0; matrixA.m[15]=1.0; - matrixB.m[0]=1.0; matrixB.m[5]=1.0; matrixB.m[10]=1.0; matrixB.m[15]=1.0; + matrixA.m[0]=1.0; matrixA.m[5]=1.0; matrixA.m[10]=1.0; matrixA.m[15]=1.0; + matrixB.m[0]=1.0; matrixB.m[5]=1.0; matrixB.m[10]=1.0; matrixB.m[15]=1.0; unsigned int numberOfSamples = 100000; unsigned long unoptimizedTime = 0; unsigned long optimizedTime = 0; - + unsigned int errors = 0; for (i=0; i0) { fprintf(stderr,"%u errors encountered..\n",errors); } - + print4x4FMatrix("Unoptimized Result",testResultUnoptimized.m,1); print4x4FMatrix("Optimized Result",testResultOptimized.m,1); - + printf("Finished with %u samples !\n",numberOfSamples); printf("%0.4f microseconds unoptimized!\n",(float) unoptimizedTime/numberOfSamples); printf("%0.4f microseconds optimized!\n",(float) optimizedTime/numberOfSamples); diff --git a/AmMatrix/matrix4x4Tools.c b/AmMatrix/matrix4x4Tools.c index e95ae0c..68cc0b5 100644 --- a/AmMatrix/matrix4x4Tools.c +++ b/AmMatrix/matrix4x4Tools.c @@ -79,10 +79,24 @@ enum mat4x4EItem 12 13 14 15 */ +#ifdef _MSC_VER + #ifdef __cplusplus + alignas(16) const float identityMatrix4x4[16]={1.0,0.0,0.0,0.0, + 0.0,1.0,0.0,0.0, + 0.0,0.0,1.0,0.0, + 0.0,0.0,0.0,1.0}; + #else + __declspec(align(16)) const float identityMatrix4x4[16]={1.0,0.0,0.0,0.0, + 0.0,1.0,0.0,0.0, + 0.0,0.0,1.0,0.0, + 0.0,0.0,0.0,1.0}; + #endif +#else const float __attribute__((aligned(16))) identityMatrix4x4[16]={1.0,0.0,0.0,0.0, 0.0,1.0,0.0,0.0, 0.0,0.0,1.0,0.0, 0.0,0.0,0.0,1.0}; +#endif void print4x4FMatrix(const char * str , float * matrix4x4,int forcePrint) { diff --git a/AmMatrix/matrix4x4Tools.h b/AmMatrix/matrix4x4Tools.h index 6502ca9..1fe6987 100644 --- a/AmMatrix/matrix4x4Tools.h +++ b/AmMatrix/matrix4x4Tools.h @@ -65,7 +65,15 @@ struct Matrix4x4OfFloats I31 , I32 , I33 , I34 , I41 , I42 , I43 , I44 */ +#ifdef _MSC_VER + #ifdef __cplusplus + alignas(16) float m[16]; + #else + __declspec(align(16)) float m[16]; + #endif +#else float __attribute__((aligned(16))) m[16]; +#endif }; @@ -79,7 +87,15 @@ struct Vector4x1OfFloats IRC => Item Row/Column => I11, I12, I13, I14 */ +#ifdef _MSC_VER + #ifdef __cplusplus + alignas(16) float m[4]; + #else + __declspec(align(16)) float m[4]; + #endif +#else float __attribute__((aligned(16))) m[4]; +#endif }; diff --git a/CMakeLists.txt b/CMakeLists.txt index 8dc5382..aa8b067 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,18 +90,31 @@ option(SAM3D_TENSORRT # --------------------------------------------------------------------------- # CUDA (optional – enables ONNX Runtime CUDA EP and ggml CUDA backend) # --------------------------------------------------------------------------- -find_package(CUDAToolkit QUIET) -if(CUDAToolkit_FOUND) - set(CMAKE_CUDA_ARCHITECTURES "86" CACHE STRING "CUDA architectures" FORCE) - enable_language(CUDA) - set(GGML_CUDA ON CACHE BOOL "" FORCE) - set(GGML_CUDA_F16 ON CACHE BOOL "" FORCE) - set(WITH_CUDA ON) - message(STATUS "CUDA ${CUDAToolkit_VERSION} found (sm_${CMAKE_CUDA_ARCHITECTURES})") +if(WIN32) + # On Windows, we avoid enable_language(CUDA) because it's brittle in this environment. + # We still check for CUDAToolkit to enable the C++ parts that use it. + find_package(CUDAToolkit QUIET) + if(CUDAToolkit_FOUND) + set(WITH_CUDA ON) + set(GGML_CUDA OFF) # GGML CUDA needs enable_language(CUDA) + message(STATUS "CUDA ${CUDAToolkit_VERSION} found (NVCC build disabled on Windows)") + else() + set(WITH_CUDA OFF) + endif() else() - set(GGML_CUDA OFF CACHE BOOL "" FORCE) - set(WITH_CUDA OFF) - message(STATUS "CUDA not found – CPU-only build") + find_package(CUDAToolkit QUIET) + if(CUDAToolkit_FOUND) + set(CMAKE_CUDA_ARCHITECTURES "86" CACHE STRING "CUDA architectures" FORCE) + enable_language(CUDA) + set(GGML_CUDA ON CACHE BOOL "" FORCE) + set(GGML_CUDA_F16 ON CACHE BOOL "" FORCE) + set(WITH_CUDA ON) + message(STATUS "CUDA ${CUDAToolkit_VERSION} found (sm_${CMAKE_CUDA_ARCHITECTURES})") + else() + set(GGML_CUDA OFF CACHE BOOL "" FORCE) + set(WITH_CUDA OFF) + message(STATUS "CUDA not found – CPU-only build") + endif() endif() # --------------------------------------------------------------------------- @@ -168,31 +181,61 @@ if(NOT DEFINED ONNX_RUNTIME_DIR OR ONNX_RUNTIME_DIR STREQUAL "") # After extraction + rename, contents live directly in _ORT_DOWNLOAD_DIR set(ONNX_RUNTIME_DIR "${_ORT_DOWNLOAD_DIR}") - if(NOT EXISTS "${_ORT_DOWNLOAD_DIR}") + if(NOT EXISTS "${ONNX_RUNTIME_DIR}/include/onnxruntime_cxx_api.h") message(STATUS "Downloading ONNX Runtime ${_ORT_VERSION} (${_ORT_OS}${_ORT_SUFFIX}) …") file(DOWNLOAD "${_ORT_URL}" "${CMAKE_BINARY_DIR}/${_ORT_ARCHIVE}" SHOW_PROGRESS) + file(MAKE_DIRECTORY "${ONNX_RUNTIME_DIR}") execute_process( COMMAND ${CMAKE_COMMAND} -E tar xf "${CMAKE_BINARY_DIR}/${_ORT_ARCHIVE}" - WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" - ) - file(RENAME - "${CMAKE_BINARY_DIR}/${_ORT_DIRNAME}" - "${_ORT_DOWNLOAD_DIR}" + WORKING_DIRECTORY "${ONNX_RUNTIME_DIR}" ) + # Microsoft's zip/tgz extracts into a subdir named exactly as the archive base. + if(EXISTS "${ONNX_RUNTIME_DIR}/${_ORT_DIRNAME}") + file(RENAME "${ONNX_RUNTIME_DIR}/${_ORT_DIRNAME}/include" "${ONNX_RUNTIME_DIR}/include") + file(RENAME "${ONNX_RUNTIME_DIR}/${_ORT_DIRNAME}/lib" "${ONNX_RUNTIME_DIR}/lib") + if(WIN32) + file(RENAME "${ONNX_RUNTIME_DIR}/${_ORT_DIRNAME}/bin" "${ONNX_RUNTIME_DIR}/bin") + endif() + endif() endif() endif() endif() endif() if(NOT TARGET onnxruntime::onnxruntime) - set(ORT_INCLUDE_DIRS "${ONNX_RUNTIME_DIR}/include") - set(ORT_LIB_DIR "${ONNX_RUNTIME_DIR}/lib") - find_library(ORT_LIB onnxruntime PATHS "${ORT_LIB_DIR}" NO_DEFAULT_PATH REQUIRED) + if(NOT ORT_INCLUDE_DIRS OR NOT ORT_LIB) + set(ORT_INCLUDE_DIRS "${ONNX_RUNTIME_DIR}/include") + set(ORT_LIB_DIR "${ONNX_RUNTIME_DIR}/lib") + # Ensure we look in the absolute path, not relative to build dir + find_library(ORT_LIB onnxruntime PATHS "${ORT_LIB_DIR}" NO_DEFAULT_PATH) + if(WIN32) + find_library(ORT_IMPLIB onnxruntime PATHS "${ORT_LIB_DIR}" NO_DEFAULT_PATH) + endif() + endif() add_library(onnxruntime::onnxruntime SHARED IMPORTED) - set_target_properties(onnxruntime::onnxruntime PROPERTIES - IMPORTED_LOCATION "${ORT_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${ORT_INCLUDE_DIRS}" - ) + if(WIN32) + if(ORT_IMPLIB) + set_target_properties(onnxruntime::onnxruntime PROPERTIES + IMPORTED_LOCATION "${ORT_LIB_DIR}/onnxruntime.dll" + IMPORTED_IMPLIB "${ORT_IMPLIB}" + INTERFACE_INCLUDE_DIRECTORIES "${ORT_INCLUDE_DIRS}" + ) + elseif(ORT_LIB) + # If we only have one library file on Windows, treat it as both for now to satisfy CMake + set_target_properties(onnxruntime::onnxruntime PROPERTIES + IMPORTED_LOCATION "${ORT_LIB}" + IMPORTED_IMPLIB "${ORT_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${ORT_INCLUDE_DIRS}" + ) + endif() + else() + if(ORT_LIB) + set_target_properties(onnxruntime::onnxruntime PROPERTIES + IMPORTED_LOCATION "${ORT_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${ORT_INCLUDE_DIRS}" + ) + endif() + endif() set(ORT_LIBS onnxruntime::onnxruntime) message(STATUS "ONNX Runtime at ${ONNX_RUNTIME_DIR}") endif() @@ -287,8 +330,13 @@ if(WIN32) endif() if(WITH_CUDA) - target_sources(fast_sam_3dbody PRIVATE src/mhr_lbs_cuda.cu) - target_compile_definitions(fast_sam_3dbody PUBLIC GGML_USE_CUDA FSB_CUDA) + if(NOT WIN32) + target_sources(fast_sam_3dbody PRIVATE src/mhr_lbs_cuda.cu) + endif() + target_compile_definitions(fast_sam_3dbody PUBLIC FSB_CUDA) + if(GGML_CUDA) + target_compile_definitions(fast_sam_3dbody PUBLIC GGML_USE_CUDA) + endif() target_link_libraries(fast_sam_3dbody PUBLIC CUDA::cudart) endif() @@ -323,67 +371,55 @@ target_compile_options(fast_sam_3dbody_run PRIVATE # --------------------------------------------------------------------------- # fast_sam_3dbody_render executable (OpenGL overlay renderer) # -# Linux-only: it is built on a GLX/X11 OpenGL context (glx3.c) with no Windows -# (WGL) equivalent in-tree. Windows builds are headless — the renderer is -# skipped and a notice is printed instead. +# Built on a GLX/X11 OpenGL context (glx3.c) on Linux, and WGL on Windows. # --------------------------------------------------------------------------- -if(NOT WIN32) - find_package(OpenGL REQUIRED) +find_package(OpenGL REQUIRED) + +if(WIN32) + set(RENDERER_SYSTEM_SRC ${GRAPHICS_ENGINE_DIR}/System/win32_gl.c) + set(GLEW_INCLUDE_DIRS "C:/MoCap/glew/glew-2.3.1/include") + set(GLEW_LIBRARIES "C:/MoCap/glew/glew-2.3.1/lib/Release/x64/glew32.lib") + set(RENDERER_LIBS OpenGL::GL ${GLEW_LIBRARIES} ${OpenCV_LIBS} ${MATH_LIB} AmMatrix) +else() find_package(GLEW REQUIRED) + set(RENDERER_SYSTEM_SRC ${GRAPHICS_ENGINE_DIR}/System/glx3.c) + set(RENDERER_LIBS OpenGL::GL GLEW::GLEW ${OpenCV_LIBS} X11 ${RT_LIB} ${MATH_LIB} AmMatrix) +endif() - add_executable(fast_sam_3dbody_render - render/fast_sam_3dbody_render.cpp - ${GRAPHICS_ENGINE_DIR}/System/glx3.c - ${GRAPHICS_ENGINE_DIR}/ModelLoader/model_loader_tri.c - ${GRAPHICS_ENGINE_DIR}/ModelLoader/model_loader_transform_joints.c - ) +add_executable(fast_sam_3dbody_render + render/fast_sam_3dbody_render.cpp + ${RENDERER_SYSTEM_SRC} + ${GRAPHICS_ENGINE_DIR}/ModelLoader/model_loader_tri.c + ${GRAPHICS_ENGINE_DIR}/ModelLoader/model_loader_transform_joints.c +) - target_compile_definitions(fast_sam_3dbody_render PRIVATE USE_GLEW) +target_compile_definitions(fast_sam_3dbody_render PRIVATE USE_GLEW) - target_include_directories(fast_sam_3dbody_render PRIVATE - src - ${GRAPHICS_ENGINE_DIR} - ${ORT_INCLUDE_DIRS} - ${OpenCV_INCLUDE_DIRS} - ${GLEW_INCLUDE_DIRS} - ) +target_include_directories(fast_sam_3dbody_render PRIVATE + src + ${GRAPHICS_ENGINE_DIR} + ${ORT_INCLUDE_DIRS} + ${OpenCV_INCLUDE_DIRS} + ${GLEW_INCLUDE_DIRS} +) - target_link_libraries(fast_sam_3dbody_render PRIVATE - fast_sam_3dbody - OpenGL::GL - GLEW::GLEW - ${OpenCV_LIBS} - X11 - ${RT_LIB} - ${MATH_LIB} - AmMatrix - ) +target_link_libraries(fast_sam_3dbody_render PRIVATE + fast_sam_3dbody + ${RENDERER_LIBS} +) - if(CMAKE_BUILD_TYPE STREQUAL "Debug") - target_compile_options(fast_sam_3dbody_render PRIVATE - $<$:-Wall -Wextra -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -g -O0> - $<$:-Wall -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -g -O0> - ) - else() - target_compile_options(fast_sam_3dbody_render PRIVATE - $<$:-Wall -Wextra -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -O3 -march=native> - $<$:-Wall -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -O2> - ) - endif() +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + target_compile_options(fast_sam_3dbody_render PRIVATE + $<$:-Wall -Wextra -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -g -O0> + $<$:-Wall -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -g -O0> + $<$:/Zi /Od> + ) else() - message(STATUS "") - message(STATUS " ┌─────────────────────────────────────────────────────────────────┐") - message(STATUS " │ Windows build: HEADLESS ONLY │") - message(STATUS " │ │") - message(STATUS " │ Live visualization is NOT supported on Windows. The OpenGL │") - message(STATUS " │ overlay renderer (fast_sam_3dbody_render) needs GLX/X11 and │") - message(STATUS " │ will not be built. │") - message(STATUS " │ │") - message(STATUS " │ Use these instead: │") - message(STATUS " │ fast_sam_3dbody_run – CLI pose/keypoint output │") - message(STATUS " │ offline_sam_3dbody_render – offline BVH extraction │") - message(STATUS " └─────────────────────────────────────────────────────────────────┘") - message(STATUS "") + target_compile_options(fast_sam_3dbody_render PRIVATE + $<$:-Wall -Wextra -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -O3 -march=native> + $<$:-Wall -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -O2> + $<$:/O2> + ) endif() # --------------------------------------------------------------------------- @@ -471,14 +507,15 @@ add_subdirectory(synchronization) # multi-view capture & fusion (MULTIVIEW_PLAN.md) — .calib loader so far # --------------------------------------------------------------------------- enable_testing() +add_subdirectory(AmMatrix) add_subdirectory(multiview) # --------------------------------------------------------------------------- # Summary # --------------------------------------------------------------------------- if(WIN32) - set(_VIS_STATUS "DISABLED (headless build — Windows not supported)") - set(_TARGETS_STATUS "fast_sam_3dbody (lib) fast_sam_3dbody_run (exe) offline_sam_3dbody_render (exe)") + set(_VIS_STATUS "ENABLED (WGL/win32_gl)") + set(_TARGETS_STATUS "fast_sam_3dbody (lib) fast_sam_3dbody_run (exe) fast_sam_3dbody_render (exe) offline_sam_3dbody_render (exe)") else() set(_VIS_STATUS "enabled (fast_sam_3dbody_render)") set(_TARGETS_STATUS "fast_sam_3dbody (lib) fast_sam_3dbody_run (exe) fast_sam_3dbody_render (exe) offline_sam_3dbody_render (exe)") diff --git a/GraphicsEngine/MotionCaptureLoader/calculate/bvh_to_tri_pose.c b/GraphicsEngine/MotionCaptureLoader/calculate/bvh_to_tri_pose.c index 1615839..c05ec90 100644 --- a/GraphicsEngine/MotionCaptureLoader/calculate/bvh_to_tri_pose.c +++ b/GraphicsEngine/MotionCaptureLoader/calculate/bvh_to_tri_pose.c @@ -1,5 +1,39 @@ #include #include +#ifdef _MSC_VER +#include +#define read _read +#define write _write +#include +typedef SSIZE_T ssize_t; + +static ssize_t getline(char **lineptr, size_t *n, FILE *stream) { + if (lineptr == NULL || n == NULL || stream == NULL) return -1; + if (*lineptr == NULL || *n == 0) { + *n = 128; + *lineptr = (char *)malloc(*n); + if (*lineptr == NULL) return -1; + } + ssize_t count = 0; + int ch; + while ((ch = fgetc(stream)) != EOF) { + if (count + 1 >= (ssize_t)*n) { + size_t new_n = *n * 2; + char *new_ptr = (char *)realloc(*lineptr, new_n); + if (new_ptr == NULL) return -1; + *lineptr = new_ptr; + *n = new_n; + } + (*lineptr)[count++] = (char)ch; + if (ch == '\n') break; + } + if (count == 0) return -1; + (*lineptr)[count] = '\0'; + return count; +} +#else +#include +#endif #include "bvh_to_tri_pose.h" #include "../../TrajectoryParser/InputParser_C.h" diff --git a/GraphicsEngine/MotionCaptureLoader/import/fromBVH.c b/GraphicsEngine/MotionCaptureLoader/import/fromBVH.c index aee937e..81aca62 100644 --- a/GraphicsEngine/MotionCaptureLoader/import/fromBVH.c +++ b/GraphicsEngine/MotionCaptureLoader/import/fromBVH.c @@ -1,4 +1,41 @@ #include "fromBVH.h" +#include +#include + +#ifdef _MSC_VER +#include +#define read _read +#define write _write +#include +typedef SSIZE_T ssize_t; + +static ssize_t getline(char **lineptr, size_t *n, FILE *stream) { + if (lineptr == NULL || n == NULL || stream == NULL) return -1; + if (*lineptr == NULL || *n == 0) { + *n = 128; + *lineptr = (char *)malloc(*n); + if (*lineptr == NULL) return -1; + } + ssize_t count = 0; + int ch; + while ((ch = fgetc(stream)) != EOF) { + if (count + 1 >= (ssize_t)*n) { + size_t new_n = *n * 2; + char *new_ptr = (char *)realloc(*lineptr, new_n); + if (new_ptr == NULL) return -1; + *lineptr = new_ptr; + *n = new_n; + } + (*lineptr)[count++] = (char)ch; + if (ch == '\n') break; + } + if (count == 0) return -1; + (*lineptr)[count] = '\0'; + return count; +} +#else +#include +#endif #include #include "../../TrajectoryParser/InputParser_C.h" diff --git a/GraphicsEngine/System/win32_gl.c b/GraphicsEngine/System/win32_gl.c new file mode 100644 index 0000000..77df1b4 --- /dev/null +++ b/GraphicsEngine/System/win32_gl.c @@ -0,0 +1,144 @@ +#include +#include +#include +#include "win32_gl.h" + +// Note: We use the same function names as glx3.c to avoid changing the caller. + +static HWND hwnd = NULL; +static HDC hdc = NULL; +static HGLRC hglrc = NULL; +static int close_requested = 0; + +static LRESULT CALLBACK WindowProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam) { + switch (uMsg) { + case WM_CLOSE: + close_requested = 1; + return 0; + case WM_DESTROY: + PostQuitMessage(0); + return 0; + case WM_KEYDOWN: + if (wParam == VK_ESCAPE) { + close_requested = 1; + } + return 0; + } + return DefWindowProc(hwnd, uMsg, wParam, lParam); +} + +int disableVSync() { + typedef BOOL (WINAPI * PFNWGLSWAPINTERVALEXTPROC) (int interval); + PFNWGLSWAPINTERVALEXTPROC wglSwapIntervalEXT = (PFNWGLSWAPINTERVALEXTPROC)wglGetProcAddress("wglSwapIntervalEXT"); + if (wglSwapIntervalEXT) { + return wglSwapIntervalEXT(0); + } + return 0; +} + +int start_glx3_stuff(int WIDTH, int HEIGHT, int viewWindow, int argc, const char **argv) { + HINSTANCE hInstance = GetModuleHandle(NULL); + WNDCLASS wc = {0}; + wc.lpfnWndProc = WindowProc; + wc.hInstance = hInstance; + wc.lpszClassName = "FSB_Render_Window"; + wc.hCursor = LoadCursor(NULL, IDC_ARROW); + + if (!RegisterClass(&wc)) { + fprintf(stderr, "Failed to register window class\n"); + return 0; + } + + DWORD dwStyle = WS_OVERLAPPEDWINDOW; + if (!viewWindow) { + dwStyle = WS_POPUP; // Hidden-ish or at least no decorations + } + + RECT rect = {0, 0, WIDTH, HEIGHT}; + AdjustWindowRect(&rect, dwStyle, FALSE); + + hwnd = CreateWindowEx( + 0, "FSB_Render_Window", "SAM-3D-Body Renderer", + dwStyle, + CW_USEDEFAULT, CW_USEDEFAULT, + rect.right - rect.left, rect.bottom - rect.top, + NULL, NULL, hInstance, NULL + ); + + if (!hwnd) { + fprintf(stderr, "Failed to create window\n"); + return 0; + } + + if (viewWindow) { + ShowWindow(hwnd, SW_SHOW); + } else { + // We don't show the window, but we still need a context + ShowWindow(hwnd, SW_HIDE); + } + + hdc = GetDC(hwnd); + + PIXELFORMATDESCRIPTOR pfd = {0}; + pfd.nSize = sizeof(pfd); + pfd.nVersion = 1; + pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER; + pfd.iPixelType = PFD_TYPE_RGBA; + pfd.cColorBits = 32; + pfd.cDepthBits = 24; + pfd.cStencilBits = 8; + pfd.iLayerType = PFD_MAIN_PLANE; + + int format = ChoosePixelFormat(hdc, &pfd); + SetPixelFormat(hdc, format, &pfd); + + hglrc = wglCreateContext(hdc); + wglMakeCurrent(hdc, hglrc); + + return 1; +} + +int stop_glx3_stuff() { + if (hglrc) { + wglMakeCurrent(NULL, NULL); + wglDeleteContext(hglrc); + hglrc = NULL; + } + if (hdc) { + ReleaseDC(hwnd, hdc); + hdc = NULL; + } + if (hwnd) { + DestroyWindow(hwnd); + hwnd = NULL; + } + return 1; +} + +int glx3_endRedraw() { + if (hdc) { + SwapBuffers(hdc); + return 1; + } + return 0; +} + +int glx3_checkEvents() { + MSG msg; + while (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) { + if (msg.message == WM_QUIT) { + close_requested = 1; + } + TranslateMessage(&msg); + DispatchMessage(&msg); + } + return !close_requested; +} + +int glx3_should_close() { + return close_requested; +} + +void glx3_request_close() { + close_requested = 1; +} diff --git a/GraphicsEngine/System/win32_gl.h b/GraphicsEngine/System/win32_gl.h new file mode 100644 index 0000000..aae533e --- /dev/null +++ b/GraphicsEngine/System/win32_gl.h @@ -0,0 +1,55 @@ +/** @file win32_gl.h + * @brief Win32 bindings to create an OpenGL 3.x+ context and start rendering to a window + */ + +#ifndef WIN32_GL_H_INCLUDED +#define WIN32_GL_H_INCLUDED + +#ifdef __cplusplus +extern "C" { +#endif + +int disableVSync(); + +/** +* @brief create a window that can serve OpenGL draw requests +* @param WIDTH, The width of the window in pixels +* @param HEIGHT, The height of the window in pixels +* @param viewWindow, Setting this value to zero will make the "window" invisible +* @param argc, Number of input arguments from main +* @param argv, Pointer to an array of strings from main +* @retval 1=Success , 0=Failure +*/ +int start_glx3_stuff(int WIDTH, int HEIGHT, int viewWindow, int argc, const char **argv); + +int stop_glx3_stuff(); + +/** +* @brief After drawing everything on our OpenGL window this call swaps buffers and outputs +* @retval 1=Success , 0=Failure +*/ +int glx3_endRedraw(); + +/** +* @brief Pump pending Windows messages. +* +* Returns 1 while the GL surface is still alive. Returns 0 once a close has been requested. +* @retval 1=keep running, 0=close requested +*/ +int glx3_checkEvents(); + +/** +* @brief Returns non-zero once a clean shutdown has been requested. +*/ +int glx3_should_close(); + +/** +* @brief Programmatically request a clean shutdown. +*/ +void glx3_request_close(); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/README_windows.md b/README_windows.md new file mode 100644 index 0000000..fa7a7b2 --- /dev/null +++ b/README_windows.md @@ -0,0 +1,71 @@ +# SAM3DBody-cpp: Windows Build & Usage Guide + +This document provides instructions for building and running **SAM3DBody-cpp** on Windows using Visual Studio and CMake. + +## 1. Prerequisites + +Before building, ensure you have the following installed: + +* **Visual Studio 2022** (with "Desktop development with C++" workload). +* **CMake** (version 3.18 or higher). +* **CUDA Toolkit** (Optional, recommended for GPU acceleration). +* **OpenCV**: Download and extract the Windows build (e.g., v4.x). +* **GLEW**: Required for the OpenGL renderer. + * The project expects GLEW at `C:\MoCap\glew\glew-2.3.1`. + * If installed elsewhere, update the paths in `CMakeLists.txt` (lines 379-380). + +## 2. Dependency Setup + +### Environment Variables +Set the following environment variables to help CMake find your dependencies: + +* `OpenCV_DIR`: Path to your OpenCV build directory (e.g., `C:\opencv\build`). +* Add OpenCV and CUDA `bin` directories to your system `PATH` to ensure DLLs are found at runtime. + +### Model Files +Download the models from [HuggingFace](https://huggingface.co/AmmarkoV/SAM3DBody-cpp-onnx-models) and extract them into an `onnx/` folder at the repository root. + +## 3. Building the Project + +Open **PowerShell** in the repository root and run: + +```powershell +# Create and enter build directory +mkdir build +cd build + +# Configure the project for Visual Studio 2022 x64 +# Note: Adjust paths to ONNX_RUNTIME_DIR and OpenCV_DIR to match your installation +cmake .. -G "Visual Studio 17 2022" -A x64 ` + "-DONNX_RUNTIME_DIR=C:\MoCap\onnx_rt\onnxruntime-win-x64-gpu-1.26.0" ` + "-DOpenCV_DIR=C:\MoCap\opencv-4.10.0\opencv\build" + +# Build in Release mode +cmake --build . --config Release --parallel +``` + +The executables (`fast_sam_3dbody_run.exe`, `fast_sam_3dbody_render.exe`, etc.) will be located in `build\Release\`. + +## 4. Running the Pipeline + +Use the provided PowerShell scripts to handle paths and model flags automatically. + +### Live Visualization (with 3D Overlay) +```powershell +.\scripts\video.ps1 --from path\to\your_video.mp4 +``` +* **Controls**: Use the mouse to rotate/zoom the 3D view. Press `ESC` to exit. +* **Saving**: Add `--save output.mp4` to render the visualization to a video file. + +### Headless BVH Extraction +```powershell +.\scripts\offline_video.ps1 --from path\to\your_video.mp4 --bvh output_name +``` +* This generates `output_name_0.bvh`, `output_name_1.bvh`, etc., for each tracked person. + +## 6. Troubleshooting + +* **Missing DLLs**: If the program fails to start (e.g., error `0xc000007b`), ensure `opencv_world*.dll`, `onnxruntime.dll`, and `glew32.dll` are in your `PATH` or copied to the same folder as the `.exe`. +* **CUDA Errors**: If you have an incompatible GPU or missing drivers, use `--cuda -1` to force CPU inference (note: this will be very slow). +* **OpenGL Errors**: Ensure your GPU drivers support OpenGL 3.3 or higher. +``` diff --git a/fast_sam_3dbody_frontend.py b/fast_sam_3dbody_frontend.py index c8fc119..2e28abb 100644 --- a/fast_sam_3dbody_frontend.py +++ b/fast_sam_3dbody_frontend.py @@ -77,14 +77,28 @@ class FsbResult(ctypes.Structure): def load_library(lib_dir: str) -> ctypes.CDLL: - lib_path = os.path.join(lib_dir, "libfast_sam_3dbody.so") + is_windows = os.name == 'nt' + lib_name = "fast_sam_3dbody.dll" if is_windows else "libfast_sam_3dbody.so" + lib_path = os.path.join(lib_dir, lib_name) if not os.path.exists(lib_path): sys.exit(f"Library not found: {lib_path}\nBuild the project first.") - # Add the lib directory to LD_LIBRARY_PATH so transitive .so deps are found - prev = os.environ.get("LD_LIBRARY_PATH", "") - ort_lib = os.path.join(lib_dir, "onnxruntime_dl", "lib") - os.environ["LD_LIBRARY_PATH"] = ":".join(filter(None, [lib_dir, ort_lib, prev])) + # Add the lib directory and dependencies to PATH/LD_LIBRARY_PATH + if is_windows: + abs_lib_dir = os.path.abspath(lib_dir) + # On Windows, we need to add paths to the DLL search path + os.environ["PATH"] = abs_lib_dir + os.pathsep + os.environ.get("PATH", "") + # Also try AddDllDirectory for Python 3.8+ if available + if hasattr(os, 'add_dll_directory'): + try: + os.add_dll_directory(abs_lib_dir) + except Exception as e: + print(f"Warning: could not add DLL directory {abs_lib_dir}: {e}") + # Add ONNX Runtime and OpenCV if we know where they are (they were copied to Release) + else: + prev = os.environ.get("LD_LIBRARY_PATH", "") + ort_lib = os.path.join(lib_dir, "onnxruntime_dl", "lib") + os.environ["LD_LIBRARY_PATH"] = ":".join(filter(None, [lib_dir, ort_lib, prev])) lib = ctypes.CDLL(lib_path) diff --git a/multiview/extrinsics_test.cpp b/multiview/extrinsics_test.cpp index 20ff1d6..56b92bd 100644 --- a/multiview/extrinsics_test.cpp +++ b/multiview/extrinsics_test.cpp @@ -8,6 +8,9 @@ #include "extrinsics.h" #include +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif #include using mv::Mat4; diff --git a/render/fast_sam_3dbody_render.cpp b/render/fast_sam_3dbody_render.cpp index 7204d2b..18d0901 100644 --- a/render/fast_sam_3dbody_render.cpp +++ b/render/fast_sam_3dbody_render.cpp @@ -17,10 +17,23 @@ // GLEW must come before any other GL header. #include #include +#ifndef _WIN32 #include +#endif + +#ifdef _WIN32 +#include +#define NS_NOW() (std::chrono::duration_cast(std::chrono::steady_clock::now().time_since_epoch()).count()) +#else +#define NS_NOW() ({ struct timespec _t; clock_gettime(CLOCK_MONOTONIC,&_t); (long long)_t.tv_sec*1000000000LL + _t.tv_nsec; }) +#endif extern "C" { +#ifdef _WIN32 +#include "../GraphicsEngine/System/win32_gl.h" +#else #include "../GraphicsEngine/System/glx3.h" +#endif #include "../GraphicsEngine/ModelLoader/model_loader_tri.h" #include "../GraphicsEngine/ModelLoader/model_loader_transform_joints.h" } @@ -98,7 +111,7 @@ static GLuint compile_shader(GLenum type, const char* src) { return s; } -static GLuint link_program(const char* vs, const char* fs) +static GLuint link_program(const char* vs, const char* fs) { GLuint p = glCreateProgram(); GLuint v = compile_shader(GL_VERTEX_SHADER, vs); @@ -116,13 +129,13 @@ static GLuint link_program(const char* vs, const char* fs) // ── GPU mesh state ─────────────────────────────────────────────────────────── -struct MeshGPU +struct MeshGPU { GLuint vao, vbo_pos, vbo_norm, ebo; GLsizei n_indices; }; -static MeshGPU upload_mesh_once(const struct TRI_Model* m) +static MeshGPU upload_mesh_once(const struct TRI_Model* m) { MeshGPU g{}; g.n_indices = (GLsizei)m->header.numberOfIndices; @@ -330,10 +343,10 @@ static bool upload_bg_frame(BgTex& t, const cv::Mat& bgr) // ── 4x4 matrix multiply (column-major) ────────────────────────────────────── -static void mat4_mul(float dst[16], const float a[16], const float b[16]) +static void mat4_mul(float dst[16], const float a[16], const float b[16]) { for (int c = 0; c < 4; ++c) - for (int r = 0; r < 4; ++r) + for (int r = 0; r < 4; ++r) { dst[c*4+r] = 0.f; for (int k = 0; k < 4; ++k) @@ -382,7 +395,7 @@ int mat4_transpose(float * mat) } // ── Callbacks required by glx3.c ───────────────────────────────────────────── -extern "C" +extern "C" { // Called by glx3_checkEvents() on key/mouse events. int handleUserInput(int key, int x, int y) { (void)key; (void)x; (void)y; return 1; } @@ -392,7 +405,7 @@ extern "C" // ── YOLO skeleton joint pairs (COCO 17-joint order) ───────────────────────── -static const int COCO_PAIRS[][2] = +static const int COCO_PAIRS[][2] = { {0,1},{0,2},{1,3},{2,4}, // head {5,6},{5,7},{7,9},{6,8},{8,10}, // arms @@ -402,7 +415,7 @@ static const int N_COCO_PAIRS = 17; static void draw_yolo_skeleton(cv::Mat& img, const std::vector& kps, - float conf_thresh = 0.3f) + float conf_thresh = 0.3f) { if ((int)kps.size() < 51) return; // Draw limb lines first, then joint dots on top @@ -437,8 +450,9 @@ static void save_framebuffer(const std::string& path, int w, int h) { cv::Mat img(h, w, CV_8UC3, px.data()); cv::flip(img, img, 0); cv::cvtColor(img, img, cv::COLOR_RGB2BGR); - cv::imwrite(path, img); - printf("Saved: %s\n", path.c_str()); + std::vector params = {cv::IMWRITE_JPEG_QUALITY, 85}; + cv::imwrite(path, img, params); + //printf("Saved: %s\n", path.c_str()); } // ── Export the deformed body mesh to a Wavefront .obj ───────────────────────── @@ -771,6 +785,7 @@ int main(int argc, const char** argv) { // ── Shaders ─────────────────────────────────────────────────────────────── GLuint prog_quad = link_program(QUAD_VERT, QUAD_FRAG); + if (!prog_quad) { fprintf(stderr, "Failed to link quad program\n"); return 1; } std::string mesh_vert_src = load_text_file(vert_path.c_str()); std::string mesh_frag_src = load_text_file(frag_path.c_str()); @@ -897,7 +912,6 @@ int main(int argc, const char** argv) { glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); // ── Render loop ─────────────────────────────────────────────────────────── -#define NS_NOW() ({ struct timespec _t; clock_gettime(CLOCK_MONOTONIC,&_t); (long long)_t.tv_sec*1000000000LL + _t.tv_nsec; }) long long t_last_frame = NS_NOW(); long long t_last_grab = NS_NOW(); // wall-clock of the last frame we pulled (live sync) long long t_session_start = NS_NOW(); // for measuring the effective live frame rate @@ -912,7 +926,7 @@ int main(int argc, const char** argv) { const int frame_stop = (max_frames > 0) ? start_frame + max_frames : -1; cv::Mat frame; - while (glx3_checkEvents()) + while (glx3_checkEvents()) { if (is_image) { @@ -938,7 +952,10 @@ int main(int argc, const char** argv) { } cap >> frame; // newest available frame t_last_grab = NS_NOW(); - if (frame.empty()) break; + if (frame.empty()) { + fprintf(stderr, "\n[cap] end of stream reached at frame %d\n", frame_index); + break; + } } // Inference @@ -1004,13 +1021,11 @@ int main(int argc, const char** argv) { draw_yolo_skeleton(vis, r.keypoints_yolo); } - // Upload background. Failures are non-fatal: skip the background - // quad for this frame and let the next frame retry. Killing the - // process here is the regression that produced truncated mp4s - // (e.g. matrix_rendered.mp4 stopping at 14s with audio continuing - // for the full 90s) — a single bad frame should not take down a - // long render. + // Upload background. bool bg_ok = upload_bg_frame(bg, vis); + if (!bg_ok) { + fprintf(stderr, "\n[GL] background upload failed at frame %d\n", frame_index); + } glClearColor(0.f, 0.f, 0.f, 1.f); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); @@ -1236,6 +1251,7 @@ int main(int argc, const char** argv) { t_last_frame = t_now; fps_ema = (fps_ema == 0.0) ? (1000.0 / frame_ms) : (0.9 * fps_ema + 0.1 * (1000.0 / frame_ms)); + fprintf(stderr, "\r FPS: %5.1f Latency: %4.0f ms Subjects: %d ", fps_ema, latency_ms, (int)results.size()); fflush(stderr); diff --git a/ros_demo_webcam.py b/ros_demo_webcam.py index b8b9d4f..58c9279 100644 --- a/ros_demo_webcam.py +++ b/ros_demo_webcam.py @@ -175,13 +175,27 @@ class FsbResult(ctypes.Structure): def load_library(lib_dir: str) -> ctypes.CDLL: - lib_path = os.path.join(lib_dir, "libfast_sam_3dbody.so") + is_windows = os.name == 'nt' + lib_name = "fast_sam_3dbody.dll" if is_windows else "libfast_sam_3dbody.so" + lib_path = os.path.join(lib_dir, lib_name) if not os.path.exists(lib_path): sys.exit(f"Library not found: {lib_path}\nBuild the project first.") - prev = os.environ.get("LD_LIBRARY_PATH", "") - ort_lib = os.path.join(lib_dir, "onnxruntime_dl", "lib") - os.environ["LD_LIBRARY_PATH"] = ":".join(filter(None, [lib_dir, ort_lib, prev])) + # Add the lib directory and dependencies to PATH/LD_LIBRARY_PATH + if is_windows: + abs_lib_dir = os.path.abspath(lib_dir) + # On Windows, we need to add paths to the DLL search path + os.environ["PATH"] = abs_lib_dir + os.pathsep + os.environ.get("PATH", "") + # Also try AddDllDirectory for Python 3.8+ if available + if hasattr(os, 'add_dll_directory'): + try: + os.add_dll_directory(abs_lib_dir) + except Exception as e: + print(f"Warning: could not add DLL directory {abs_lib_dir}: {e}") + else: + prev = os.environ.get("LD_LIBRARY_PATH", "") + ort_lib = os.path.join(lib_dir, "onnxruntime_dl", "lib") + os.environ["LD_LIBRARY_PATH"] = ":".join(filter(None, [lib_dir, ort_lib, prev])) lib = ctypes.CDLL(lib_path) lib.fsb_create.restype = ctypes.c_void_p diff --git a/scripts/offline_video.ps1 b/scripts/offline_video.ps1 new file mode 100644 index 0000000..f5ace57 --- /dev/null +++ b/scripts/offline_video.ps1 @@ -0,0 +1,107 @@ +# scripts/offline_video.ps1 +# +# PowerShell entry point for the offline multi-pass BVH extractor. +# Windows-compatible version of scripts/offline_video.sh. +# +# Usage: +# .\scripts\offline_video.ps1 --from clip.mp4 --bvh out.bvh [--smoothing ...] +# .\scripts\offline_video.ps1 clip.mp4 --bvh out.bvh --save vis.mp4 + +$PSScriptRoot = Split-Path -Parent $MyInvocation.MyCommand.Definition +Set-Location "$PSScriptRoot\.." + +# Locate the input video +$FROM_SRC = "" +$OFFLINE_ARGS = @() +$SAVE_REQUESTED = $false +$SAVE_OUTPUT = "" + +for ($i = 0; $i -lt $args.Count; $i++) { + $a = $args[$i] + if ($a -eq "--from") { + if ($i + 1 -lt $args.Count) { + $FROM_SRC = $args[$i + 1] + $i++ + } + } + elseif ($a -eq "--save") { + $SAVE_REQUESTED = $true + if ($i + 1 -lt $args.Count -and -not $args[$i + 1].StartsWith("-")) { + $SAVE_OUTPUT = $args[$i + 1] + $i++ + } + } + else { + # Positional fallback for the first argument if it doesn't start with - + if ($i -eq 0 -and -not $a.StartsWith("-") -and $FROM_SRC -eq "") { + $FROM_SRC = $a + } + else { + $OFFLINE_ARGS += $a + } + } +} + +if ([string]::IsNullOrEmpty($FROM_SRC)) { + Write-Error "Usage: .\scripts\offline_video.ps1 --from VIDEO --bvh OUT.bvh [options] [--save VIS.mp4]" + Write-Error " (or positional: .\scripts\offline_video.ps1 VIDEO --bvh OUT.bvh ...)" + exit 2 +} + +if (-not (Test-Path $FROM_SRC)) { + Write-Error "Input video not found: $FROM_SRC" + exit 2 +} + +# Locate binary +$BIN = "" +$BIN_PATHS = @( + "build\Release\offline_sam_3dbody_render.exe", + "build\Debug\offline_sam_3dbody_render.exe", + "build\offline_sam_3dbody_render.exe" +) + +foreach ($path in $BIN_PATHS) { + if (Test-Path $path) { + $BIN = $path + break + } +} + +if ($BIN -eq "") { + Write-Error "Could not find offline_sam_3dbody_render.exe in build directories." + exit 1 +} + +$FIXED_FLAGS = @( + "--onnx-dir", ".\onnx", + "--gguf", ".\onnx\pipeline.gguf", + "--yolo", ".\onnx\yolo.onnx" +) + +Write-Host "Running: $BIN --from $FROM_SRC $($FIXED_FLAGS -join ' ') $($OFFLINE_ARGS -join ' ')" +& $BIN --from $FROM_SRC @FIXED_FLAGS @OFFLINE_ARGS +$OFFLINE_EXIT = $LASTEXITCODE + +if ($OFFLINE_EXIT -eq $null) { + # If binary is missing or can't be executed + $OFFLINE_EXIT = 1 +} + +if ($OFFLINE_EXIT -ne 0) { + Write-Error "Offline binary exited with code $OFFLINE_EXIT - skipping rendered-mp4 step." + exit $OFFLINE_EXIT +} + +if ($SAVE_REQUESTED) { + Write-Host "" + Write-Host ("-" * 66) + Write-Host " offline BVH done - now rendering visualisation mp4 via video.ps1" + Write-Host ("-" * 66) + + if (-not [string]::IsNullOrEmpty($SAVE_OUTPUT)) { + & "$PSScriptRoot\video.ps1" --from "$FROM_SRC" --save "$SAVE_OUTPUT" + } else { + & "$PSScriptRoot\video.ps1" --from "$FROM_SRC" --save + } +} diff --git a/scripts/video.ps1 b/scripts/video.ps1 new file mode 100644 index 0000000..592e995 --- /dev/null +++ b/scripts/video.ps1 @@ -0,0 +1,157 @@ +# scripts/video.ps1 +# +# PowerShell entry point for the live visualization renderer. +# Windows-compatible version of scripts/video.sh. +# +# Usage: +# .\scripts\video.ps1 --from clip.mp4 [--save vis.mp4] + +$PSScriptRoot = Split-Path -Parent $MyInvocation.MyCommand.Definition +Set-Location "$PSScriptRoot\.." + +# Parse arguments +$SAVE_REQUESTED = $false +$SAVE_OUTPUT = "" +$FROM_SRC = "" +$FORWARD_ARGS = @() + +for ($i = 0; $i -lt $args.Count; $i++) { + $a = $args[$i] + if ($a -eq "--save") { + $SAVE_REQUESTED = $true + if ($i + 1 -lt $args.Count -and -not $args[$i + 1].StartsWith("-")) { + $SAVE_OUTPUT = $args[$i + 1] + $i++ + } + } + elseif ($a -eq "--from") { + if ($i + 1 -lt $args.Count) { + $FROM_SRC = $args[$i + 1] + $i++ + } + } + else { + # Positional fallback for the first argument + if ($i -eq 0 -and -not $a.StartsWith("-") -and $FROM_SRC -eq "") { + $FROM_SRC = $a + } + else { + $FORWARD_ARGS += $a + } + } +} + +if ([string]::IsNullOrEmpty($FROM_SRC)) { + Write-Host "Usage: .\scripts\video.ps1 --from VIDEO [--save VIS.mp4] [options]" + exit 2 +} + +# Locate binary +$BIN = "" +$BIN_PATHS = @( + "build\Release\fast_sam_3dbody_render.exe", + "build\Debug\fast_sam_3dbody_render.exe", + "build\fast_sam_3dbody_render.exe" +) + +foreach ($path in $BIN_PATHS) { + if (Test-Path $path) { + $BIN = $path + break + } +} + +if ($BIN -eq "") { + Write-Error "Could not find fast_sam_3dbody_render.exe in build directories. Build the project first." + exit 1 +} + +$FIXED_FLAGS = @( + "--onnx-dir", ".\onnx", + "--gguf", ".\onnx\pipeline.gguf", + "--yolo", ".\onnx\yolo.onnx", + "--mesh", ".\body_mesh.tri", + "--lbs", "onnx\body_model.lbs" +) + +if (-not $SAVE_REQUESTED) { + # Normal live mode + & $BIN --from $FROM_SRC @FIXED_FLAGS @FORWARD_ARGS + exit $LASTEXITCODE +} + +# Save-to-file mode +if ([string]::IsNullOrEmpty($SAVE_OUTPUT)) { + if (Test-Path $FROM_SRC) { + $base = Split-Path -Leaf $FROM_SRC + $stem = [System.IO.Path]::GetFileNameWithoutExtension($base) + $SAVE_OUTPUT = "${stem}_rendered.mp4" + } else { + $SAVE_OUTPUT = "livelastRun3DHiRes.mp4" + } + Write-Host "Output: $SAVE_OUTPUT" +} + +# Create a temporary directory for the JPEG frames +$TMP_DIR = [System.IO.Path]::Combine([System.IO.Path]::GetTempPath(), "fsb_frames_" + [System.Guid]::NewGuid().ToString().Substring(0,8)) +New-Item -ItemType Directory -Path $TMP_DIR | Out-Null +$FRAME_PREFIX = [System.IO.Path]::Combine($TMP_DIR, "colorFrame_0_") + +Write-Host "Rendering frames to $TMP_DIR ..." +& $BIN --from $FROM_SRC @FIXED_FLAGS @FORWARD_ARGS --headless --save-frames "$FRAME_PREFIX" +$RENDER_EXIT = $LASTEXITCODE + +$ACTUAL_FRAMES = (Get-ChildItem "${FRAME_PREFIX}*.jpg" -ErrorAction SilentlyContinue).Count +if ($ACTUAL_FRAMES -eq 0) { + Write-Error "Renderer did not produce any frames. Exit code: $RENDER_EXIT" + Remove-Item -Recurse -Force $TMP_DIR + exit $RENDER_EXIT +} + +# Probe FPS from source +$FPS = 30 +if (Test-Path $FROM_SRC) { + $ffprobe_out = ffprobe -v error -select_streams v:0 -show_entries stream=r_frame_rate -of csv=p=0 $FROM_SRC 2>$null + if ($ffprobe_out -match "(\d+)/(\d+)") { + $FPS = [double]$Matches[1] / [double]$Matches[2] + } elseif ($ffprobe_out -match "^\d+(\.\d+)?$") { + $FPS = [double]$ffprobe_out + } +} +Write-Host "Source framerate: $FPS fps" + +# Probe Size from first frame +$SIZE_ARG = @() +$FIRST_FRAME = Get-ChildItem "${FRAME_PREFIX}*.jpg" | Sort-Object Name | Select-Object -First 1 +if ($null -ne $FIRST_FRAME) { + $ffprobe_out = ffprobe -v error -select_streams v:0 -show_entries stream=width,height -of csv=p=0 $FIRST_FRAME.FullName 2>$null + if ($ffprobe_out -match "(\d+),(\d+)") { + $FW = [int]$Matches[1] + $FH = [int]$Matches[2] + # yuv420p requires even dimensions + $FW = [int]($FW / 2) * 2 + $FH = [int]($FH / 2) * 2 + $SIZE_ARG = @("-s", "${FW}x${FH}") + Write-Host "Render size: ${FW}x${FH}" + } +} + +# Check for audio +$AUDIO_ARGS = @() +if (Test-Path $FROM_SRC) { + $audio_idx = ffprobe -v error -select_streams a:0 -show_entries stream=index -of csv=p=0 $FROM_SRC 2>$null + if (-not [string]::IsNullOrEmpty($audio_idx)) { + Write-Host "Copying audio from: $FROM_SRC" + $AUDIO_ARGS = @("-i", $FROM_SRC, "-map", "0:v", "-map", "1:a", "-c:a", "copy") + } +} + +# Encode +Write-Host "Encoding to $SAVE_OUTPUT ..." +ffmpeg -framerate $FPS -i "${FRAME_PREFIX}%05d.jpg" @AUDIO_ARGS @SIZE_ARG -y -r $FPS -pix_fmt yuv420p -threads 8 $SAVE_OUTPUT +$FFMPEG_EXIT = $LASTEXITCODE + +# Cleanup +Remove-Item -Recurse -Force $TMP_DIR + +exit $FFMPEG_EXIT diff --git a/src/bvh_writer.h b/src/bvh_writer.h index 88947f3..b053d7b 100644 --- a/src/bvh_writer.h +++ b/src/bvh_writer.h @@ -16,11 +16,15 @@ #include #include +#include "export_macros.h" + +#include "fast_sam_3dbody_capi.h" + struct MHR_LBS_Data; struct BVH_MotionCapture; namespace fsb { struct MHRResult; } -class BVHWriter +class FSB_API BVHWriter { public: // Public because the static NAME_MAP table in bvh_writer.cpp tags each entry diff --git a/src/cli_common.h b/src/cli_common.h index 7b845cb..c878097 100644 --- a/src/cli_common.h +++ b/src/cli_common.h @@ -59,13 +59,17 @@ #include #include // resolve_backbone_defaults(): probe for backbone_fp16.onnx #include + +#include "export_macros.h" + +#ifndef _WIN32 #include // resolve_detector_defaults(): find libreyolo*.onnx in onnx_dir #include // ensure_trt_models(): readlink("/proc/self/exe") to locate setup_trt.sh +#endif -#include "fast_sam_3dbody.h" // for fsb::PipelineConfig - +#include "fast_sam_3dbody_capi.h" -struct CommonConfig +struct FSB_API CommonConfig { // ── Pipeline (model paths + ONNX runtime knobs) ────────────────────────── std::string onnx_dir = "./onnx"; @@ -259,6 +263,7 @@ inline void resolve_detector_defaults(CommonConfig& c) if (c.detector == "auto") { // Prefer a LibreYOLO model on disk when the user hasn't pinned --yolo. if (!c.yolo_path_set) { +#ifndef _WIN32 std::string pattern = c.onnx_dir + "/libreyolo*.onnx"; glob_t g{}; if (glob(pattern.c_str(), 0, nullptr, &g) == 0 && g.gl_pathc > 0) { @@ -268,6 +273,16 @@ inline void resolve_detector_defaults(CommonConfig& c) "preferring it over yolo-pose\n", c.yolo_path.c_str()); } globfree(&g); +#else + // Windows fallback: no globbing, just check for a common name + std::string libreyolo = c.onnx_dir + "/libreyolo.onnx"; + if (std::ifstream(libreyolo).good()) { + c.yolo_path = libreyolo; + std::fprintf(stderr, + "[cli] --detector auto: found LibreYOLO model '%s'; " + "preferring it over yolo-pose\n", c.yolo_path.c_str()); + } +#endif } c.detector = path_looks_like_libreyolo(c.yolo_path) ? "libreyolo" : "yolo-pose"; @@ -306,6 +321,7 @@ inline void ensure_trt_models(const CommonConfig& c) // Locate setup_trt.sh relative to this executable (binaries live in build/, // so ../tools/), with the working dir as a fallback. std::string script; +#ifndef _WIN32 { char buf[4096]; ssize_t n = ::readlink("/proc/self/exe", buf, sizeof(buf) - 1); @@ -321,6 +337,10 @@ inline void ensure_trt_models(const CommonConfig& c) if (script.empty() && std::ifstream("tools/setup_trt.sh").good()) script = "tools/setup_trt.sh"; } +#else + if (std::ifstream("tools/setup_trt.sh").good()) + script = "tools/setup_trt.sh"; +#endif if (script.empty()) { std::fprintf(stderr, "[cli] TRT: backbone_fp16_trt.onnx / decoder_fp16.onnx missing and " diff --git a/src/export_macros.h b/src/export_macros.h new file mode 100644 index 0000000..010d733 --- /dev/null +++ b/src/export_macros.h @@ -0,0 +1,11 @@ +#pragma once + +#ifdef _WIN32 + #ifdef fast_sam_3dbody_EXPORTS + #define FSB_API __declspec(dllexport) + #else + #define FSB_API __declspec(dllimport) + #endif +#else + #define FSB_API +#endif diff --git a/src/fast_sam_3dbody.cpp b/src/fast_sam_3dbody.cpp index 7a0a3ad..beeea3d 100644 --- a/src/fast_sam_3dbody.cpp +++ b/src/fast_sam_3dbody.cpp @@ -11,6 +11,13 @@ #define FSB_HAS_OPENCV_MAT 1 +#ifdef _WIN32 +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include +#endif + #include "fast_sam_3dbody.h" #include "preprocess.hpp" @@ -45,7 +52,9 @@ // ── LBS ────────────────────────────────────────────────────────────────────── #include "../GraphicsEngine/ModelLoader/model_loader_transform_joints.h" +#ifndef _WIN32 #include "mhr_lbs_cuda.cuh" +#endif // ── STL ────────────────────────────────────────────────────────────────────── #include @@ -241,6 +250,8 @@ struct OrtSession const EP ep = ladder[a]; const bool last = (a + 1 == ladder.size()); Ort::SessionOptions opts; + // Silence potential performance warnings about Memcpy nodes if level is high + opts.SetLogSeverityLevel(3); // 3 = Error, 4 = Fatal. Silences Warning(2). opts.SetIntraOpNumThreads(1); opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); try @@ -287,7 +298,17 @@ struct OrtSession } // EP_CPU: append nothing — the default CPU EP runs. +#ifdef _WIN32 + std::wstring wpath; + int len = MultiByteToWideChar(CP_UTF8, 0, path.c_str(), -1, NULL, 0); + if (len > 0) { + wpath.resize(len); + MultiByteToWideChar(CP_UTF8, 0, path.c_str(), -1, &wpath[0], len); + } + session = new Ort::Session(e, wpath.c_str(), opts); +#else session = new Ort::Session(e, path.c_str(), opts); +#endif if (ep == EP_CPU && cuda) fprintf(stderr, "[ORT] WARNING: '%s' running on CPU (GPU EPs unavailable)\n", path.c_str()); @@ -381,7 +402,9 @@ struct Pipeline::Impl // Native C LBS (body_model.lbs) — loaded when body_model.onnx is unavailable struct MHR_LBS_Data* lbs_data = nullptr; +#if defined(FSB_CUDA) && !defined(_WIN32) MHR_LBS_CUDACtx* lbs_cuda = nullptr; // GPU-accelerated path; null on CPU builds +#endif // ── per-stage timing accumulators ────────────────────────────────────────── // Wall time (ms) spent in each pipeline stage, summed across every @@ -478,8 +501,12 @@ struct Pipeline::Impl { printf("OK (%d joints, %d vertices)\n", lbs_data->n_joints, lbs_data->n_verts); #ifdef FSB_CUDA + #ifndef _WIN32 lbs_cuda = mhr_lbs_cuda_init(lbs_data); + #endif +#if defined(FSB_CUDA) && !defined(_WIN32) if (lbs_cuda) printf("[FSB] LBS CUDA accelerated (GPU shape blend + scatter)\n"); +#endif #endif // Load keypoint mapping even with LBS @@ -774,7 +801,6 @@ struct Pipeline::Impl // ── backbone ───────────────────────────────────────────────────────── t0 = Clock::now(); - const int FEAT_HW = CROP_SIZE / 16; // 32 const int BACKBONE_DIM = 1280; const size_t feat_elems = (size_t)B * BACKBONE_DIM * FEAT_HW * FEAT_HW; @@ -957,7 +983,7 @@ struct Pipeline::Impl float* joints_out = all_skel.data() + (size_t)i * 127 * 3; static const float zero_face[72] = {}; -#ifdef FSB_CUDA +#if defined(FSB_CUDA) && !defined(_WIN32) if (lbs_cuda) { mhr_lbs_cuda_compute(lbs_cuda, lbs_data, mp.data, raw_i + 266, @@ -1221,7 +1247,9 @@ struct Pipeline::Impl sess_decoder.free(); sess_body.free(); sess_yolo.free(); +#if defined(FSB_CUDA) && !defined(_WIN32) if (lbs_cuda) { mhr_lbs_cuda_free(lbs_cuda); lbs_cuda = nullptr; } +#endif if (lbs_data) { mhr_lbs_free(lbs_data); diff --git a/src/fast_sam_3dbody.h b/src/fast_sam_3dbody.h index fab3742..22da827 100644 --- a/src/fast_sam_3dbody.h +++ b/src/fast_sam_3dbody.h @@ -19,6 +19,8 @@ #include #include +#include "export_macros.h" + namespace fsb { // ─── Output per detected person ────────────────────────────────────────────── @@ -110,7 +112,7 @@ struct PipelineConfig { }; // ─── Pipeline class ─────────────────────────────────────────────────────────── -class Pipeline { +class FSB_API Pipeline { public: Pipeline(); ~Pipeline(); diff --git a/src/offline_passes.cpp b/src/offline_passes.cpp index fc0b7c7..01ee826 100644 --- a/src/offline_passes.cpp +++ b/src/offline_passes.cpp @@ -547,7 +547,7 @@ build_global_tracks(std::vector& frames, const Config& cfg) // Retire any track we haven't seen in too long. live.erase(std::remove_if(live.begin(), live.end(), - [F](const LiveTrack& t){ return (F - t.last_frame) > MAX_MISSING; }), + [F, MAX_MISSING](const LiveTrack& t){ return (F - t.last_frame) > MAX_MISSING; }), live.end()); } diff --git a/src/offline_passes.h b/src/offline_passes.h index 0c9b547..d152dde 100644 --- a/src/offline_passes.h +++ b/src/offline_passes.h @@ -22,11 +22,15 @@ #include #include +#include "export_macros.h" + #include // cv::VideoCapture #include "fast_sam_3dbody.h" // fsb::MHRResult, fsb::Pipeline #include "cli_common.h" // CommonConfig +#include "fast_sam_3dbody_capi.h" + namespace offline { @@ -36,7 +40,7 @@ namespace offline // at 18439×3 floats per detection it's the bulk of MHRResult's memory cost // and we never need the mesh for BVH export. This keeps memory bounded // even on long clips (~1 hour at 30 fps with 3 people ≈ 600 MB). -struct FrameRecord +struct FSB_API FrameRecord { int frame_idx = -1; std::vector detections; // pipeline output @@ -46,7 +50,7 @@ struct FrameRecord // Identity span across the session: a contiguous block of frames where the // same person is tracked, with the mapping back into each FrameRecord. -struct Track +struct FSB_API Track { int id = -1; int first_frame = INT32_MAX; @@ -64,7 +68,7 @@ struct Track // --bvh --bvh-template --no-bvh-*-shape-change --bvh-raw-fingers // --bw-cutoff --rot-clamp // Offline-specific knobs (smoothing / tracking / scene / gap / jitter) below. -struct Config : public CommonConfig +struct FSB_API Config : public CommonConfig { Config() { // Offline-specific default: --rot-clamp's geodesic SLERP clamp is @@ -152,7 +156,7 @@ struct Config : public CommonConfig // Each operates on the shared FrameRecord/Track buffers; call them in order. // PASS 1 — decode video + run MHR inference + detect scene cuts. -bool run_inference_pass(fsb::Pipeline& pipeline, +bool FSB_API run_inference_pass(fsb::Pipeline& pipeline, cv::VideoCapture& cap, std::vector& out_frames, std::vector& out_scene_cuts, @@ -160,36 +164,36 @@ bool run_inference_pass(fsb::Pipeline& pipeline, const Config& cfg); // PASS 2 — globally-optimal identity tracking across the whole clip. -std::vector build_global_tracks(std::vector& frames, +std::vector FSB_API build_global_tracks(std::vector& frames, const Config& cfg); // PASS 3 — fill missing frames inside each track (respects scene cuts). -void gap_interpolation_pass(std::vector& frames, +void FSB_API gap_interpolation_pass(std::vector& frames, std::vector& tracks, const std::vector& scene_cuts, const Config& cfg); // PASS 4 — replace high-velocity (jitter) frames by interpolation (opt-in). -void interpolate_jitter_pass(std::vector& frames, +void FSB_API interpolate_jitter_pass(std::vector& frames, const std::vector& tracks, const std::vector& scene_cuts, const Config& cfg); // PASS 5 — temporal smoothing per scene segment. -void smoothing_pass(std::vector& frames, +void FSB_API smoothing_pass(std::vector& frames, const std::vector& tracks, const std::vector& scene_cuts, float fs, const Config& cfg); // PASS 6 — write BVH file(s). -void export_to_bvh(const std::vector& frames, +void FSB_API export_to_bvh(const std::vector& frames, const std::vector& tracks, const std::vector& scene_cuts, double fps, const Config& cfg); // Linear/SLERP interpolation of two MHR results (used by Pass 3/4; exposed // because the multi-view refit reuses it — see MULTIVIEW_PLAN.md). -fsb::MHRResult interp_mhr(const fsb::MHRResult& a, +fsb::MHRResult FSB_API interp_mhr(const fsb::MHRResult& a, const fsb::MHRResult& b, float t);