摘要: openblas主要用于矩阵运算的加速
下载openblas,这里用0.3.7版本
https://github.com/xianyi/OpenBLAS
编译
前提
下载地址如下:
https://releases.linaro.org/components/toolchain/binaries/6.3-2017.05/aarch64-linux-gnu/
拷贝到编译服务器,海思交叉切换到该开源工具链,执行如下命令:
tar xf gcc-linaro-6.3.1-2017.05-x86_64_aarch64-linux-gnu.tar.xz
vi .profile
PATH="$HOME/bin:$PATH:$HOME/bin/gcc-linaro-6.3.1-2017.05-x86_64_aarch64-linux-gnu/bin"
source .profile
tar -zxvf OpenBLAS-0.3.7.tar.gz
cd OpenBLAS-0.3.7
make BINARY=64 CC=aarch64-linux-gnu-gcc NOFORTRAN=1 HOSTCC=gcc TARGET=ARMV8
make PREFIX=../openblas_install install
.
├── bin
├── include
│ ├── cblas.h
│ ├── f77blas.h
│ ├── lapacke_config.h
│ ├── lapacke.h
│ ├── lapacke_mangling.h
│ ├── lapacke_utils.h
│ └── openblas_config.h
└── lib
├── cmake
│ └── openblas
│ ├── OpenBLASConfig.cmake
│ └── OpenBLASConfigVersion.cmake
├── libopenblas.a -> libopenblas_armv8p-r0.3.7.a
├── libopenblas_armv8p-r0.3.7.a
├── libopenblas_armv8p-r0.3.7.so
├── libopenblas.so -> libopenblas_armv8p-r0.3.7.so
├── libopenblas.so.0 -> libopenblas_armv8p-r0.3.7.so
└── pkgconfig
└── openblas.pc
code
#include <cblas.h>
#include <stdio.h>
int main() {
int i = 0;
double A[6] = {1.0,3.0,1.0,-3.0,4.0,-1.0};
double B[6] = {1.0,4.0,1.0,-3.0,4.0,-1.0};
double C[9] = {.5,.5,.5,1.5,.5,2.5,.5,.5,.5};
int M = 3; // row of A and C
int N = 3; // col of B and C
int K = 2; // col of A and row of B
double alpha = 1.0;
double beta = 0.0;
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, A, K, B, N, beta, C, N);
//CblasRowMajor表示行优先
//CblasNoTrans表示不转置
for (i = 0; i < 9; i++) {
printf("%lf ", C[i]);
}
printf("\n");
return 1;
}
编译cmakelists.txt
cmake_minimum_required (VERSION 2.6)
project (TEST)
set (TEST_VERSION 0.1)
set(CMAKE_BUILD_TYPE "Debug")
set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb -DDEBUG")
set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")
aux_source_directory(${PROJECT_SOURCE_DIR} DIR_SRC)
include_directories(/home/yangna/chenjun/HISI3559a/openblas_install/include) # 头文件
# link_directories(/home/yangna/chenjun/HISI3559a/openblas_install/lib)
find_library(Openblas_LIBS openblas /home/yangna/chenjun/HISI3559a/openblas_install/lib) # 库文件
add_executable(main ${DIR_SRC})
target_link_libraries(main ${Openblas_LIBS})
target_link_libraries(main -lm) # 这两个选项是必须要的
target_link_libraries(main -lpthread)
-lm -lpthread
两项mkdir build && cd build
cmake ..
make
mkdir build && cd build
cmake -DCMAKE_CXX_COMPILER=/home/yangna/Atlas500_DDK/toolchains/Euler_compile_env_cross/arm/cross_compile/install/bin/aarch64-linux-gnu-g++ ..
make
输出
Scanning dependencies of target main
[ 50%] Building CXX object CMakeFiles/main.dir/main.cpp.o
[100%] Linking CXX executable main
[100%] Built target main
上面的方法在命令行中,要输入很长的路径,还不能复用。可以用cmake的cmake_toolchain_file进行指定
新建一个hisi3559.cmake文件(一般应用开发文档都会给出的),写入:
set(EULER_CROSS_PATH /home/yangna/Atlas500_DDK/toolchains/Euler_compile_env_cross)
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR arm)
set(tools ${EULER_CROSS_PATH}/arm/cross_compile/install/)
#set(CMAKE_SYSROOT ${tools}/sysroot)
set(CMAKE_C_COMPILER ${tools}/bin/aarch64-linux-gnu-gcc)
set(CMAKE_CXX_COMPILER ${tools}/bin/aarch64-linux-gnu-g++)
set(CMAKE_AR ${tools}/bin/aarch64-linux-gnu-ar)
set(CMAKE_RANLIB ${tools}/bin/aarch64-linux-gnu-ranlib)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
mkdir build && cd build
cmake -DCMAKE_TOOLCHAIN_FILE=../atlas500_host.cmake ..
make
vi /etc/profile
# for openblas lib , 2020-08-04
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/1_profile/openblas_lib
source /etc/profile
执行main
./main
结果
-8.000000 16.000000 -2.000000 10.000000 -8.000000 4.000000 7.000000 12.000000 5.000000