Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 25 additions & 14 deletions AmMatrix/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
project( AmMatrix )
cmake_minimum_required( VERSION 2.8.7 )
cmake_minimum_required(VERSION 3.5...4.3)

#Make fast, lean and platform independent binaries..
set(CMAKE_CXX_FLAGS "-s -O3 -fPIC -march=native -mtune=native")
set(CMAKE_C_FLAGS "-s -O3 -fPIC -march=native -mtune=native")
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "-s -O3 -fPIC -march=native -mtune=native")
set(CMAKE_C_FLAGS "-s -O3 -fPIC -march=native -mtune=native")
endif()

OPTION(INTEL_OPTIMIZATIONS OFF)

if (INTEL_OPTIMIZATIONS)
add_definitions(-DINTEL_OPTIMIZATIONS)
endif(INTEL_OPTIMIZATIONS)

if(MSVC)
add_definitions(-D_USE_MATH_DEFINES)
endif()

add_library(
AmMatrix STATIC
collisions.c
Expand All @@ -22,21 +28,26 @@ add_library(
matrixMultiplicationOptimization.c
matrixMultiplicationOptimization.h
matrixOpenGL.c
matrixOpenGL.h
matrixOpenGL.h
ocvStaging.c
ocvStaging.h
solveHomography.c
solveLinearSystemGJ.c
solveLinearSystemGJ.c
simpleRenderer.c
solids.c
)
target_link_libraries(AmMatrix m rt )
if(NOT WIN32)
target_link_libraries(AmMatrix m rt )
endif()

add_subdirectory(TestCPUOptimizedInstructionSet/)
add_subdirectory(TestCPUOptimizedInstructionSet/)

set_target_properties(AmMatrix PROPERTIES
DEBUG_POSTFIX "D"
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
)
# ---------------------------------------------------------------------------
# Remove directory overrides to let CMake manage output paths automatically.
# ---------------------------------------------------------------------------
# set_target_properties(AmMatrix PROPERTIES
# DEBUG_POSTFIX "D"
# ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
# LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
# RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
# )
8 changes: 4 additions & 4 deletions AmMatrix/OpenCL/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
project( matmul )
cmake_minimum_required( VERSION 2.8.7 )
cmake_minimum_required(VERSION 3.5...4.3)

#Make fast, lean and platform independent binaries..
set(CMAKE_CXX_FLAGS "-s -O3 -fPIC -march=native -mtune=native")
set(CMAKE_C_FLAGS "-s -O3 -fPIC -march=native -mtune=native")
set(CMAKE_C_FLAGS "-s -O3 -fPIC -march=native -mtune=native")

OPTION(OPENCL_OPTIMIZATIONS OFF)

Expand All @@ -13,7 +13,7 @@ endif(OPENCL_OPTIMIZATIONS)

set_source_files_properties(mat_mul.cl PROPERTIES HEADER_FILE_ONLY TRUE)
#add_library(matmul STATIC mat_mul.c)
add_executable(matmul mat_mul.c )
add_executable(matmul mat_mul.c )
target_link_libraries(matmul m rt OpenCL )


Expand Down
18 changes: 11 additions & 7 deletions AmMatrix/TestCPUOptimizedInstructionSet/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
project( TestCPUOptimizedInstructionSet )
cmake_minimum_required( VERSION 2.8.7 )
project( TestCPUOptimizedInstructionSet )
cmake_minimum_required(VERSION 3.5...4.3)

find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})


add_executable(TestCPUOptimizedInstructionSet main.c ../matrix4x4Tools.c )
target_link_libraries(TestCPUOptimizedInstructionSet rt dl m AmMatrix )

add_executable(TestCPUOptimizedInstructionSet main.c ../matrix4x4Tools.c )
if (UNIX)
target_link_libraries(TestCPUOptimizedInstructionSet rt dl m AmMatrix )
else()
target_link_libraries(TestCPUOptimizedInstructionSet AmMatrix )
endif()
set_target_properties(TestCPUOptimizedInstructionSet PROPERTIES DEBUG_POSTFIX "D")
add_dependencies(TestCPUOptimizedInstructionSet AmMatrix)
add_dependencies(TestCPUOptimizedInstructionSet AmMatrix)

set_target_properties(TestCPUOptimizedInstructionSet PROPERTIES
set_target_properties(TestCPUOptimizedInstructionSet PROPERTIES
DEBUG_POSTFIX "D"
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
Expand Down
40 changes: 28 additions & 12 deletions AmMatrix/TestCPUOptimizedInstructionSet/main.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
#include <stdio.h>
#include <stdlib.h>
#ifdef _WIN32
#include <windows.h>
#include <stdint.h>
struct timespec {
long tv_sec;
long tv_nsec;
};
#define CLOCK_MONOTONIC 0
static int clock_gettime(int ignore, struct timespec* tv) {
uint64_t ns = (uint64_t)GetTickCount64() * 1000000;
tv->tv_sec = (long)(ns / 1000000000);
tv->tv_nsec = (long)(ns % 1000000000);
return 0;
}
#else
#include <time.h>
#endif

#include "../matrix4x4Tools.h"

Expand Down Expand Up @@ -33,28 +49,28 @@ int main()
exit(0);
}

int i=0;
int i=0;

struct Matrix4x4OfFloats testResultOptimized={0};
struct Matrix4x4OfFloats testResultUnoptimized={0};
struct Matrix4x4OfFloats matrixA={0};
struct Matrix4x4OfFloats matrixB={0};

//Set matrices to identity
matrixA.m[0]=1.0; matrixA.m[5]=1.0; matrixA.m[10]=1.0; matrixA.m[15]=1.0;
matrixB.m[0]=1.0; matrixB.m[5]=1.0; matrixB.m[10]=1.0; matrixB.m[15]=1.0;
matrixA.m[0]=1.0; matrixA.m[5]=1.0; matrixA.m[10]=1.0; matrixA.m[15]=1.0;
matrixB.m[0]=1.0; matrixB.m[5]=1.0; matrixB.m[10]=1.0; matrixB.m[15]=1.0;

unsigned int numberOfSamples = 100000;
unsigned long unoptimizedTime = 0;
unsigned long optimizedTime = 0;

unsigned int errors = 0;
for (i=0; i<numberOfSamples; i++)
{
float tmp = rand()%1000 / 100;
matrixA.m[1] = tmp;
matrixB.m[1] = tmp;

unsigned long startUnoptimized = GetTickCountMicrosecondsMN();
//multiplyTwo4x4FMatrices_Naive(testResultUnoptimized.m,matrixA.m,matrixB.m);
unsigned long endUnoptimized = GetTickCountMicrosecondsMN();
Expand All @@ -65,23 +81,23 @@ int main()
multiplyTwoRaw4x4FMatricesS(testResultOptimized.m,matrixA.m,matrixB.m);
#endif
unsigned long endOptimized = GetTickCountMicrosecondsMN();
optimizedTime+=endOptimized - startOptimized;
optimizedTime+=endOptimized - startOptimized;

if (matrixA.m[1]!=matrixB.m[1])
{
++errors;
}
}

if (errors>0)
{
fprintf(stderr,"%u errors encountered..\n",errors);
}

print4x4FMatrix("Unoptimized Result",testResultUnoptimized.m,1);
print4x4FMatrix("Optimized Result",testResultOptimized.m,1);


printf("Finished with %u samples !\n",numberOfSamples);
printf("%0.4f microseconds unoptimized!\n",(float) unoptimizedTime/numberOfSamples);
printf("%0.4f microseconds optimized!\n",(float) optimizedTime/numberOfSamples);
Expand Down
14 changes: 14 additions & 0 deletions AmMatrix/matrix4x4Tools.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,24 @@ enum mat4x4EItem
12 13 14 15
*/

#ifdef _MSC_VER
#ifdef __cplusplus
alignas(16) const float identityMatrix4x4[16]={1.0,0.0,0.0,0.0,
0.0,1.0,0.0,0.0,
0.0,0.0,1.0,0.0,
0.0,0.0,0.0,1.0};
#else
__declspec(align(16)) const float identityMatrix4x4[16]={1.0,0.0,0.0,0.0,
0.0,1.0,0.0,0.0,
0.0,0.0,1.0,0.0,
0.0,0.0,0.0,1.0};
#endif
#else
const float __attribute__((aligned(16))) identityMatrix4x4[16]={1.0,0.0,0.0,0.0,
0.0,1.0,0.0,0.0,
0.0,0.0,1.0,0.0,
0.0,0.0,0.0,1.0};
#endif

void print4x4FMatrix(const char * str , float * matrix4x4,int forcePrint)
{
Expand Down
16 changes: 16 additions & 0 deletions AmMatrix/matrix4x4Tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,15 @@ struct Matrix4x4OfFloats
I31 , I32 , I33 , I34 ,
I41 , I42 , I43 , I44
*/
#ifdef _MSC_VER
#ifdef __cplusplus
alignas(16) float m[16];
#else
__declspec(align(16)) float m[16];
#endif
#else
float __attribute__((aligned(16))) m[16];
#endif
};


Expand All @@ -79,7 +87,15 @@ struct Vector4x1OfFloats
IRC => Item Row/Column =>
I11, I12, I13, I14
*/
#ifdef _MSC_VER
#ifdef __cplusplus
alignas(16) float m[4];
#else
__declspec(align(16)) float m[4];
#endif
#else
float __attribute__((aligned(16))) m[4];
#endif
};


Expand Down
Loading