参考1:Linux下CMake简明教程
参考2:Ne10库编译使用
参考3:Arm Neon
参考4:Ne10官方网站
参考5:NE10 github地址
参考6:NE10 gitee地址
下载地址:Project Ne10: An Open Optimized Software Library Project for the Arm Architecture @ GitHub
版本:projectNe10-Ne10-v1.2.1-72-g1f059a7.zip
海思3516为armv7,可以用cat /proc/cpuinfo查看
~ # cat /proc/cpuinfo
processor : 0
model name : ARMv7 Processor rev 5 (v7l)
BogoMIPS : 100.00
Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae evtstrm
CPU implementer : 0x41
CPU architecture: 7
CPU variant : 0x0
CPU part : 0xc07
CPU revision : 5
processor : 1
model name : ARMv7 Processor rev 5 (v7l)
BogoMIPS : 100.00
Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae evtstrm
CPU implementer : 0x41
CPU architecture: 7
CPU variant : 0x0
CPU part : 0xc07
CPU revision : 5
Hardware : Generic DT based system
Revision : 0000
Serial : 0000000000000000
mike@ubuntu:/home/linux_test/test00/neon_sample/projectNe10-Ne10-v1.2.1-72-g1f059a7/projectNe10-Ne10-1f059a7/build$ cat /etc/profile
# /etc/profile: system-wide .profile file for the Bourne shell (sh(1))
# and Bourne compatible shells (bash(1), ksh(1), ash(1), ...).
if [ "$PS1" ]; then
if [ "$BASH" ] && [ "$BASH" != "/bin/sh" ]; then
# The file bash.bashrc already sets the default PS1.
# PS1='\h:\w\$ '
if [ -f /etc/bash.bashrc ]; then
. /etc/bash.bashrc
fi
else
if [ "`id -u`" -eq 0 ]; then
PS1='# '
else
PS1='$ '
fi
fi
fi
if [ -d /etc/profile.d ]; then
for i in /etc/profile.d/*.sh; do
if [ -r $i ]; then
. $i
fi
done
unset i
fi
# Tue Jun 16 06:26:59 PDT 2020
# HuaWei LiteOS Linux, Cross-Toolchain PATH
export PATH="/opt/hisi-linux/x86-arm/arm-himix200-linux/bin:$PATH"
#
export NE10_LINUX_TARGET_ARCH=armv7
a,GNUlinux_config.cmake
if(NE10_LINUX_TARGET_ARCH STREQUAL "armv7")
# set(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc)
# set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++)
# set(CMAKE_ASM_COMPILER arm-linux-gnueabihf-as)
# find_program(CMAKE_AR NAMES "arm-linux-gnueabihf-ar")
# find_program(CMAKE_RANLIB NAMES "arm-linux-gnueabihf-ranlib")
set(CMAKE_C_COMPILER arm-himix200-linux-gcc)
set(CMAKE_CXX_COMPILER arm-himix200-linux-g++)
set(CMAKE_ASM_COMPILER arm-himix200-linux-as)
find_program(CMAKE_AR NAMES "arm-himix200-linux-ar")
find_program(CMAKE_RANLIB NAMES "arm-himix200-linux-ranlib")
b,CMakeLists.txt
# Adding cflags for armv7. Aarch64 does not need such flags.
if(${NE10_TARGET_ARCH} STREQUAL "armv7")
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}-mthumb -march=armv7-a -mfloat-abi=${FLOAT_ABI} -mfpu=vfp3")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}-mfpu=neon -mthumb -march=armv7-a -mfloat-abi=${FLOAT_ABI}")
if(NE10_ARM_HARD_FLOAT)
# "--no-warn-mismatch" is needed for linker to suppress linker error about not all functions use VFP register to pass argument, eg.
# .../arm-linux-androideabi/bin/ld: error: ..../test-float.o
# uses VFP register arguments, output does not
# There is call convension mismatch between NDK's crt*.o and ne10's object files.
# crt*.o still uses softfp while ne10's object files use hard floating point.
# Refer $NDK/tests/device/hard-float/jni/Android.mk for more details.
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wl,--no-warn-mismatch")
endif()
# Turn on asm optimization for Android on ARM v7.
set(NE10_ASM_OPTIMIZATION on)
endif()
message("-- Loaded toolchain:
${CMAKE_C_COMPILER}
${CMAKE_CXX_COMPILER}
${CMAKE_ASM_COMPILER}")
message("-- CMAKE_C_FLAGS:
${CMAKE_C_FLAGS}")
elseif(GNULINUX_PLATFORM)
if("${NE10_TARGET_ARCH}" STREQUAL "armv7")
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}-mthumb -march=armv7-a -mfpu=vfp3 -funsafe-math-optimizations")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}-mfpu=neon -mthumb -march=armv7-a -funsafe-math-optimizations -mfloat-abi=softfp")
set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} -mthumb -march=armv7-a -mfpu=neon -mfloat-abi=softfp")
# Turn on asm optimization for Linux on ARM v7.
set(NE10_ASM_OPTIMIZATION on)
endif()
参考:/doc/building.md
```bash
cd $NE10_PATH
mkdir build && cd build
export NE10_LINUX_TARGET_ARCH=armv7 # Can also be "aarch64"
cmake -DCMAKE_TOOLCHAIN_FILE=../GNUlinux_config.cmake ..
make
```
mike@ubuntu:/home/linux_test/test00/neon_sample/projectNe10-Ne10-v1.2.1-72-g1f059a7/projectNe10-Ne10-1f059a7/build$ cmake -DCMAKE_TOOLCHAIN_FILE=../GNUlinux_config.cmake ..
-- Target architecture: armv7
-- Building type: RELEASE
-- Configuring done
-- Generating done
mike@ubuntu:/home/linux_test/test00/neon_sample/projectNe10-Ne10-v1.2.1-72-g1f059a7/projectNe10-Ne10-1f059a7/build$ make
[ 92%] Built target NE10
[100%] Built target NE10_test_static
/mnt/test00/neon_sample/projectNe10-Ne10-v1.2.1-72-g1f059a7/projectNe10-Ne10-1f059a7/build/samples # ./NE10_test_static
==== Ne10 Samples ===
# Introduction
test_intro[test_add_dynamic]:
ne10_addc_float: 4.200939 + 0.987757 = 5.188695
ne10_addc_float: 1.971915 + 0.987757 = 2.959671
ne10_addc_float: 3.915496 + 0.987757 = 4.903253
ne10_addc_float: 3.992200 + 0.987757 = 4.979957
ne10_addc_float: 4.558237 + 0.987757 = 5.545994
test_intro[test_add_static]:
ne10_addc_float_c: 1.676114 + 3.144355 = 4.820468
ne10_addc_float_neon: 1.676114 + 3.144355 = 4.820468
ne10_addc_float_c: 3.841148 + 3.144355 = 6.985502
ne10_addc_float_neon: 3.841148 + 3.144355 = 6.985502
ne10_addc_float_c: 1.388874 + 3.144355 = 4.533228
ne10_addc_float_neon: 1.388874 + 3.144355 = 4.533228
ne10_addc_float_c: 2.769850 + 3.144355 = 5.914205
ne10_addc_float_neon: 2.769850 + 3.144355 = 5.914205
ne10_addc_float_c: 2.386985 + 3.144355 = 5.531340
ne10_addc_float_neon: 2.386985 + 3.144355 = 5.531340
# Matrix Multiply
[ 1.82 4.58 0.71 [ 1.21 0.78 0.54 [ 8.21 11.07 24.65
2.57 3.18 3.03 * 0.69 2.00 4.99 = 17.50 10.35 20.58
4.76 3.59 0.08 ] 4.02 0.65 1.09 ] 8.57 10.97 20.59 ]
[ 2.56 1.48 2.47 [ 3.86 2.00 1.76 [ 23.29 15.23 21.84
4.20 3.19 4.86 * 2.63 4.46 4.04 = 43.30 29.50 42.62
3.06 2.62 1.46 ] 3.85 1.42 4.60 ] 24.35 19.89 22.71 ]
[ 0.35 0.43 4.45 [ 0.10 1.19 4.25 [ 2.42 22.58 14.07
4.75 0.96 1.74 * 2.29 4.85 1.33 = 3.22 18.19 26.18
2.63 3.32 0.32 ] 0.32 4.51 2.70 ] 7.95 20.67 16.48 ]
# Complex-to-Complex FFT
IN[ 0]: 18.7603 + 38.0124i OUT[ 0]: 435.0166 + 517.7552i
IN[ 1]: 25.6268 + 33.3862i OUT[ 1]: -62.7354 + 33.7247i
IN[ 2]: 26.5803 + 1.9640i OUT[ 2]: -44.8078 + -14.8473i
IN[ 3]: 21.8819 + 46.5918i OUT[ 3]: -113.4578 + -17.2272i
IN[ 4]: 46.5405 + 36.0476i OUT[ 4]: 42.9251 + 16.5104i
IN[ 5]: 14.2147 + 36.9267i OUT[ 5]: 66.1855 + 16.9207i
IN[ 6]: 31.9989 + 17.7024i OUT[ 6]: -59.6584 + 40.7193i
IN[ 7]: 34.3931 + 8.2987i OUT[ 7]: 63.1073 + -38.3605i
IN[ 8]: 22.0052 + 44.0038i OUT[ 8]: 19.2536 + -1.6965i
IN[ 9]: 41.4601 + 16.5169i OUT[ 9]: -23.9881 + -98.8069i
IN[10]: 11.4484 + 44.6686i OUT[10]: -96.9254 + 79.8168i
IN[11]: 17.5180 + 34.3335i OUT[11]: -14.1841 + -12.7989i
IN[12]: 47.8234 + 29.4320i OUT[12]: 43.3225 + 57.4142i
IN[13]: 32.8652 + 42.9338i OUT[13]: 34.0209 + 29.3279i
IN[14]: 21.9780 + 46.1985i OUT[14]: -13.0017 + -39.5426i
IN[15]: 19.9218 + 40.7383i OUT[15]: 25.0927 + 39.2896i
# Real-to-Complex FFT
IN[ 0]: 34.2109 OUT[ 0]: 446.6450 + 0.0000i
IN[ 1]: 45.5486 OUT[ 1]: -13.2045 + -39.7665i
IN[ 2]: 24.1245 OUT[ 2]: 5.1069 + -7.8707i
IN[ 3]: 10.7912 OUT[ 3]: 24.3686 + -18.2637i
IN[ 4]: 47.5126 OUT[ 4]: 68.2387 + -48.3229i
IN[ 5]: 46.0064 OUT[ 5]: -31.5288 + -65.5524i
IN[ 6]: 7.3830 OUT[ 6]: -46.2024 + 42.8686i
IN[ 7]: 44.0531 OUT[ 7]: 28.9923 + -54.2053i
IN[ 8]: 32.0540 OUT[ 8]: 29.1885 + 0.0000i
IN[ 9]: 21.5977
IN[10]: 30.9798
IN[11]: 14.0530
IN[12]: 39.3001
IN[13]: 15.3729
IN[14]: 22.3517
IN[15]: 11.3053
# FIR
Coefficients:
b[0] = 2.0825
b[1] = 2.7822
b[2] = 1.3812
b[3] = 0.9377
IN[ 0]: 3.3921 OUT[ 0]: 7.0642
IN[ 1]: 18.1361 OUT[ 1]: 47.2062
IN[ 2]: 2.0634 OUT[ 2]: 59.4408
IN[ 3]: 2.5215 OUT[ 3]: 39.2217
IN[ 4]: 9.9089 OUT[ 4]: 47.5062
IN[ 5]: 15.2095 OUT[ 5]: 64.6600
IN[ 6]: 19.6950 OUT[ 6]: 99.3814
IN[ 7]: 18.7001 OUT[ 7]: 124.0371
IN[ 8]: 13.6889 OUT[ 8]: 121.9986
IN[ 9]: 7.6638 OUT[ 9]: 98.3408
IN[10]: 14.9954 OUT[10]: 88.9915
IN[11]: 7.3733 OUT[11]: 80.4960
IN[12]: 5.8832 OUT[12]: 60.6632
IN[13]: 4.6452 OUT[13]: 50.2865
IN[14]: 11.6898 OUT[14]: 52.3075
IN[15]: 4.8883 OUT[15]: 54.6357