当前位置: 首页 > 工具软件 > Ne10 > 使用案例 >

ARM NEON学习笔记四~编译Ne10




参考3:Arm Neon


参考5:NE10 github地址

参考6:NE10 gitee地址

参考7:Ne10 加速ARM平台图像处理应用


下载地址:Project Ne10: An Open Optimized Software Library Project for the Arm Architecture @ GitHub



1, 配置目标平台

        海思3516为armv7,可以用cat /proc/cpuinfo查看

~ # cat /proc/cpuinfo
processor       : 0
model name      : ARMv7 Processor rev 5 (v7l)
BogoMIPS        : 100.00
Features        : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae evtstrm 
CPU implementer : 0x41
CPU architecture: 7
CPU variant     : 0x0
CPU part        : 0xc07
CPU revision    : 5

processor       : 1
model name      : ARMv7 Processor rev 5 (v7l)
BogoMIPS        : 100.00
Features        : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae evtstrm 
CPU implementer : 0x41
CPU architecture: 7
CPU variant     : 0x0
CPU part        : 0xc07
CPU revision    : 5

Hardware        : Generic DT based system
Revision        : 0000
Serial          : 0000000000000000


mike@ubuntu:/home/linux_test/test00/neon_sample/projectNe10-Ne10-v1.2.1-72-g1f059a7/projectNe10-Ne10-1f059a7/build$ cat /etc/profile
# /etc/profile: system-wide .profile file for the Bourne shell (sh(1))
# and Bourne compatible shells (bash(1), ksh(1), ash(1), ...).

if [ "$PS1" ]; then
  if [ "$BASH" ] && [ "$BASH" != "/bin/sh" ]; then
    # The file bash.bashrc already sets the default PS1.
    # PS1='\h:\w\$ '
    if [ -f /etc/bash.bashrc ]; then
      . /etc/bash.bashrc
    if [ "`id -u`" -eq 0 ]; then
      PS1='# '
      PS1='$ '

if [ -d /etc/profile.d ]; then
  for i in /etc/profile.d/*.sh; do
    if [ -r $i ]; then
      . $i
  unset i

# Tue Jun 16 06:26:59 PDT 2020
# HuaWei LiteOS Linux, Cross-Toolchain PATH
export PATH="/opt/hisi-linux/x86-arm/arm-himix200-linux/bin:$PATH" 

export NE10_LINUX_TARGET_ARCH=armv7



#   set(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc)
#   set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++)
#   set(CMAKE_ASM_COMPILER arm-linux-gnueabihf-as)
#   find_program(CMAKE_AR NAMES "arm-linux-gnueabihf-ar")
#   find_program(CMAKE_RANLIB NAMES "arm-linux-gnueabihf-ranlib")
   set(CMAKE_C_COMPILER arm-himix200-linux-gcc)
   set(CMAKE_CXX_COMPILER arm-himix200-linux-g++)
   set(CMAKE_ASM_COMPILER arm-himix200-linux-as)
   find_program(CMAKE_AR NAMES "arm-himix200-linux-ar")
   find_program(CMAKE_RANLIB NAMES "arm-himix200-linux-ranlib")


    # Adding cflags for armv7. Aarch64 does not need such flags.
    if(${NE10_TARGET_ARCH} STREQUAL "armv7")
#        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}-mthumb -march=armv7-a -mfloat-abi=${FLOAT_ABI} -mfpu=vfp3")
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}-mfpu=neon -mthumb -march=armv7-a -mfloat-abi=${FLOAT_ABI}")
            # "--no-warn-mismatch" is needed for linker to suppress linker error about not all functions use VFP register to pass argument, eg.
            #   .../arm-linux-androideabi/bin/ld: error: ..../test-float.o
            #           uses VFP register arguments, output does not
            # There is call convension mismatch between NDK's crt*.o and ne10's object files.
            # crt*.o still uses softfp while ne10's object files use hard floating point.
            # Refer $NDK/tests/device/hard-float/jni/Android.mk for more details.
            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wl,--no-warn-mismatch")
        # Turn on asm optimization for Android on ARM v7.
        set(NE10_ASM_OPTIMIZATION on)
    message("-- Loaded toolchain:
    message("-- CMAKE_C_FLAGS:
    if("${NE10_TARGET_ARCH}" STREQUAL "armv7")
#      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}-mthumb -march=armv7-a -mfpu=vfp3 -funsafe-math-optimizations")
      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}-mfpu=neon -mthumb -march=armv7-a -funsafe-math-optimizations -mfloat-abi=softfp")
      set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} -mthumb -march=armv7-a -mfpu=neon -mfloat-abi=softfp")
      # Turn on asm optimization for Linux on ARM v7.
      set(NE10_ASM_OPTIMIZATION on)



cd $NE10_PATH
mkdir build && cd build
export NE10_LINUX_TARGET_ARCH=armv7                      # Can also be "aarch64"
cmake -DCMAKE_TOOLCHAIN_FILE=../GNUlinux_config.cmake ..
mike@ubuntu:/home/linux_test/test00/neon_sample/projectNe10-Ne10-v1.2.1-72-g1f059a7/projectNe10-Ne10-1f059a7/build$ cmake -DCMAKE_TOOLCHAIN_FILE=../GNUlinux_config.cmake ..
-- Target architecture: armv7
-- Building type: RELEASE
-- Configuring done
-- Generating done
mike@ubuntu:/home/linux_test/test00/neon_sample/projectNe10-Ne10-v1.2.1-72-g1f059a7/projectNe10-Ne10-1f059a7/build$ make
[ 92%] Built target NE10
[100%] Built target NE10_test_static


/mnt/test00/neon_sample/projectNe10-Ne10-v1.2.1-72-g1f059a7/projectNe10-Ne10-1f059a7/build/samples # ./NE10_test_static 
==== Ne10 Samples ===

# Introduction
        ne10_addc_float:        4.200939 + 0.987757 = 5.188695
        ne10_addc_float:        1.971915 + 0.987757 = 2.959671
        ne10_addc_float:        3.915496 + 0.987757 = 4.903253
        ne10_addc_float:        3.992200 + 0.987757 = 4.979957
        ne10_addc_float:        4.558237 + 0.987757 = 5.545994
        ne10_addc_float_c:      1.676114 + 3.144355 = 4.820468
        ne10_addc_float_neon:   1.676114 + 3.144355 = 4.820468
        ne10_addc_float_c:      3.841148 + 3.144355 = 6.985502
        ne10_addc_float_neon:   3.841148 + 3.144355 = 6.985502
        ne10_addc_float_c:      1.388874 + 3.144355 = 4.533228
        ne10_addc_float_neon:   1.388874 + 3.144355 = 4.533228
        ne10_addc_float_c:      2.769850 + 3.144355 = 5.914205
        ne10_addc_float_neon:   2.769850 + 3.144355 = 5.914205
        ne10_addc_float_c:      2.386985 + 3.144355 = 5.531340
        ne10_addc_float_neon:   2.386985 + 3.144355 = 5.531340

# Matrix Multiply
[  1.82  4.58  0.71     [  1.21  0.78  0.54     [  8.21 11.07 24.65
   2.57  3.18  3.03   *    0.69  2.00  4.99   =   17.50 10.35 20.58
   4.76  3.59  0.08 ]      4.02  0.65  1.09 ]      8.57 10.97 20.59 ]

[  2.56  1.48  2.47     [  3.86  2.00  1.76     [ 23.29 15.23 21.84
   4.20  3.19  4.86   *    2.63  4.46  4.04   =   43.30 29.50 42.62
   3.06  2.62  1.46 ]      3.85  1.42  4.60 ]     24.35 19.89 22.71 ]

[  0.35  0.43  4.45     [  0.10  1.19  4.25     [  2.42 22.58 14.07
   4.75  0.96  1.74   *    2.29  4.85  1.33   =    3.22 18.19 26.18
   2.63  3.32  0.32 ]      0.32  4.51  2.70 ]      7.95 20.67 16.48 ]

# Complex-to-Complex FFT
IN[ 0]:    18.7603 +    38.0124i        OUT[ 0]:   435.0166 +   517.7552i
IN[ 1]:    25.6268 +    33.3862i        OUT[ 1]:   -62.7354 +    33.7247i
IN[ 2]:    26.5803 +     1.9640i        OUT[ 2]:   -44.8078 +   -14.8473i
IN[ 3]:    21.8819 +    46.5918i        OUT[ 3]:  -113.4578 +   -17.2272i
IN[ 4]:    46.5405 +    36.0476i        OUT[ 4]:    42.9251 +    16.5104i
IN[ 5]:    14.2147 +    36.9267i        OUT[ 5]:    66.1855 +    16.9207i
IN[ 6]:    31.9989 +    17.7024i        OUT[ 6]:   -59.6584 +    40.7193i
IN[ 7]:    34.3931 +     8.2987i        OUT[ 7]:    63.1073 +   -38.3605i
IN[ 8]:    22.0052 +    44.0038i        OUT[ 8]:    19.2536 +    -1.6965i
IN[ 9]:    41.4601 +    16.5169i        OUT[ 9]:   -23.9881 +   -98.8069i
IN[10]:    11.4484 +    44.6686i        OUT[10]:   -96.9254 +    79.8168i
IN[11]:    17.5180 +    34.3335i        OUT[11]:   -14.1841 +   -12.7989i
IN[12]:    47.8234 +    29.4320i        OUT[12]:    43.3225 +    57.4142i
IN[13]:    32.8652 +    42.9338i        OUT[13]:    34.0209 +    29.3279i
IN[14]:    21.9780 +    46.1985i        OUT[14]:   -13.0017 +   -39.5426i
IN[15]:    19.9218 +    40.7383i        OUT[15]:    25.0927 +    39.2896i

# Real-to-Complex FFT
IN[ 0]:    34.2109      OUT[ 0]:   446.6450 +     0.0000i
IN[ 1]:    45.5486      OUT[ 1]:   -13.2045 +   -39.7665i
IN[ 2]:    24.1245      OUT[ 2]:     5.1069 +    -7.8707i
IN[ 3]:    10.7912      OUT[ 3]:    24.3686 +   -18.2637i
IN[ 4]:    47.5126      OUT[ 4]:    68.2387 +   -48.3229i
IN[ 5]:    46.0064      OUT[ 5]:   -31.5288 +   -65.5524i
IN[ 6]:     7.3830      OUT[ 6]:   -46.2024 +    42.8686i
IN[ 7]:    44.0531      OUT[ 7]:    28.9923 +   -54.2053i
IN[ 8]:    32.0540      OUT[ 8]:    29.1885 +     0.0000i
IN[ 9]:    21.5977
IN[10]:    30.9798
IN[11]:    14.0530
IN[12]:    39.3001
IN[13]:    15.3729
IN[14]:    22.3517
IN[15]:    11.3053

        b[0] = 2.0825
        b[1] = 2.7822
        b[2] = 1.3812
        b[3] = 0.9377
IN[ 0]:    3.3921       OUT[ 0]:    7.0642
IN[ 1]:   18.1361       OUT[ 1]:   47.2062
IN[ 2]:    2.0634       OUT[ 2]:   59.4408
IN[ 3]:    2.5215       OUT[ 3]:   39.2217
IN[ 4]:    9.9089       OUT[ 4]:   47.5062
IN[ 5]:   15.2095       OUT[ 5]:   64.6600
IN[ 6]:   19.6950       OUT[ 6]:   99.3814
IN[ 7]:   18.7001       OUT[ 7]:  124.0371
IN[ 8]:   13.6889       OUT[ 8]:  121.9986
IN[ 9]:    7.6638       OUT[ 9]:   98.3408
IN[10]:   14.9954       OUT[10]:   88.9915
IN[11]:    7.3733       OUT[11]:   80.4960
IN[12]:    5.8832       OUT[12]:   60.6632
IN[13]:    4.6452       OUT[13]:   50.2865
IN[14]:   11.6898       OUT[14]:   52.3075
IN[15]:    4.8883       OUT[15]:   54.6357
