nopenpilot/third_party/acados/include/blasfeo/include/blasfeo_block_size.h

372 lines
13 KiB
C

/**************************************************************************************************
* *
* This file is part of BLASFEO. *
* *
* BLASFEO -- BLAS For Embedded Optimization. *
* Copyright (C) 2019 by Gianluca Frison. *
* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. *
* All rights reserved. *
* *
* The 2-Clause BSD License *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are met: *
* *
* 1. Redistributions of source code must retain the above copyright notice, this *
* list of conditions and the following disclaimer. *
* 2. Redistributions in binary form must reproduce the above copyright notice, *
* this list of conditions and the following disclaimer in the documentation *
* and/or other materials provided with the distribution. *
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND *
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED *
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE *
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR *
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; *
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de *
* *
**************************************************************************************************/
#ifndef BLASFEO_BLOCK_SIZE_H_
#define BLASFEO_BLOCK_SIZE_H_
#define D_EL_SIZE 8 // double precision
#define S_EL_SIZE 4 // single precision
#if defined( TARGET_X64_INTEL_HASWELL )
// common
#define CACHE_LINE_SIZE 64 // data cache size: 64 bytes
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB, 8-way
#define L2_CACHE_SIZE (256*1024) // L2 data cache size: 256 kB ; DTLB1 64*4 kB = 256 kB
#define LLC_CACHE_SIZE (6*1024*1024) // LLC cache size: 6 MB ; TLB 1024*4 kB = 4 MB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 12 // max kernel size
#define D_KC 256 // 192
#define D_NC 72 //96 //72 // 120 // 512
#define D_MC 1500 // 6000
// single
#define S_PS 8 // panel size
#define S_PLD 4 // 2 // GCD of panel length
#define S_M_KERNEL 24 // max kernel size
#define S_KC 256
#define S_NC 144
#define S_MC 3000
#elif defined( TARGET_X64_INTEL_SANDY_BRIDGE )
// common
#define CACHE_LINE_SIZE 64 // data cache size: 64 bytes
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB, 8-way
#define L2_CACHE_SIZE (256*1024) // L2 data cache size: 256 kB ; DTLB1 64*4 kB = 256 kB
#define LLC_CACHE_SIZE (4*1024*1024) // LLC cache size: 4 MB ; TLB 1024*4 kB = 4 MB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 8 // max kernel size
#define D_KC 256 //320 //256 //320
#define D_NC 72 //64 //72 //60 // 120
#define D_MC 1000 // 800
// single
#define S_PS 8 // panel size
#define S_PLD 4 // 2 // GCD of panel length
#define S_M_KERNEL 16 // max kernel size
#define S_KC 256
#define S_NC 144
#define S_MC 2000
#elif defined( TARGET_X64_INTEL_CORE )
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined( TARGET_X64_AMD_BULLDOZER )
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined( TARGET_X86_AMD_JAGUAR )
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined( TARGET_X86_AMD_BARCELONA )
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined(TARGET_ARMV8A_ARM_CORTEX_A76)
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (64*1024) // L1 data cache size: 64 kB, 4-way ; DTLB1 48*4 kB = 192 kB
#define LLC_CACHE_SIZE (1*1024*1024) // LLC cache size: 1 MB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 8 // max kernel size
#define D_KC 512 //256
#define D_NC 128 //256
#define D_MC 6000
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 8 // max kernel size
#define S_KC 512
#define S_NC 256
#define S_MC 6000
#elif defined(TARGET_ARMV8A_ARM_CORTEX_A73)
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 (64?) kB, 4-way, seen as 8-(16-)way ; DTLB1 48*4 kB = 192 kB
#define LLC_CACHE_SIZE (1*1024*1024) // LLC cache size: 1 MB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 8 // max kernel size
#define D_KC 320
#define D_NC 256
#define D_MC 6000
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 8 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined(TARGET_ARMV8A_ARM_CORTEX_A57)
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB, 2-way ; DTLB1 32*4 kB = 128 kB
#define LLC_CACHE_SIZE (1*1024*1024) // LLC cache size: 1 MB // 2 MB ???
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 8 // max kernel size
#define D_KC 224 //256 //192
#define D_NC 40 //36 //48
#define D_MC 512 //488 //600
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 8 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined(TARGET_ARMV8A_ARM_CORTEX_A55)
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB, 4-way ; DTLB1 16*4 kB = 64 kB
#define LLC_CACHE_SIZE (512*1024) // LLC cache size: 512 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 12 // max kernel size
#define D_KC 224
#define D_NC 160
#define D_MC 6000
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 8 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined(TARGET_ARMV8A_ARM_CORTEX_A53)
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB, 4-way ??? ; DTLB1 10*4 kB = 40 kB
#define LLC_CACHE_SIZE (256*1024) // LLC cache size: 256 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 12 // max kernel size
#define D_KC 160
#define D_NC 128
#define D_MC 6000
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 8 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined( TARGET_ARMV7A_ARM_CORTEX_A15 )
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined( TARGET_ARMV7A_ARM_CORTEX_A7 )
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined( TARGET_ARMV7A_ARM_CORTEX_A9 )
// common
#define CACHE_LINE_SIZE 32
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#elif defined( TARGET_GENERIC )
// common
#define CACHE_LINE_SIZE 64
#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB
// double
#define D_PS 4 // panel size
#define D_PLD 4 // 2 // GCD of panel length
#define D_M_KERNEL 4 // max kernel size
#define D_KC 256
#define D_NC 128 // TODO these are just dummy
#define D_MC 3000 // TODO these are just dummy
// single
#define S_PS 4
#define S_PLD 4 //2
#define S_M_KERNEL 4 // max kernel size
#define S_KC 256
#define S_NC 128 // TODO these are just dummy
#define S_MC 3000 // TODO these are just dummy
#else
#error "Unknown architecture"
#endif
#define D_CACHE_LINE_EL (CACHE_LINE_SIZE/D_EL_SIZE)
#define D_L1_CACHE_EL (L1_CACHE_SIZE/D_EL_SIZE)
#define D_L2_CACHE_EL (L2_CACHE_SIZE/D_EL_SIZE)
#define D_LLC_CACHE_EL (LLC_CACHE_SIZE/D_EL_SIZE)
#define S_CACHE_LINE_EL (CACHE_LINE_SIZE/S_EL_SIZE)
#define S_L1_CACHE_EL (L1_CACHE_SIZE/S_EL_SIZE)
#define S_L2_CACHE_EL (L2_CACHE_SIZE/S_EL_SIZE)
#define S_LLC_CACHE_EL (LLC_CACHE_SIZE/S_EL_SIZE)
#endif // BLASFEO_BLOCK_SIZE_H_