#ifndef WORKLOAD_H #define WORKLOAD_H #include <stdint.h> #include <stdbool.h> /* #define BLOCK_xN 1 #define LD_xN_RAND_AVX512 LD_x1_RAND_AVX512 #define STWB_xN_RAND_AVX512 STWB_x1_RAND_AVX512 #define NTLD_xN_RAND_AVX512 NTLD_x1_RAND_AVX512 #define NTST_xN_RAND_AVX512 NTST_x1_RAND_AVX512 */ /* #define BLOCK_xN 8 #define LD_xN_RAND_AVX512 LD_x8_RAND_AVX512 #define STWB_xN_RAND_AVX512 STWB_x8_RAND_AVX512 #define NTLD_xN_RAND_AVX512 NTLD_x8_RAND_AVX512 #define NTST_xN_RAND_AVX512 NTST_x8_RAND_AVX512 */ #define BLOCK_xN 16 #define LD_xN_RAND_AVX512 LD_x16_RAND_AVX512 #define STWB_xN_RAND_AVX512 STWB_x16_RAND_AVX512 #define NTLD_xN_RAND_AVX512 NTLD_x16_RAND_AVX512 #define NTST_xN_RAND_AVX512 NTST_x16_RAND_AVX512 //#define LD_xN_RAND_AVX512 LD_LFENCE_x16_RAND_AVX512 //#define STWB_xN_RAND_AVX512 STWB_SFENCE_x16_RAND_AVX512 //#define NTLD_xN_RAND_AVX512 NTLD_LFENCE_x16_RAND_AVX512 //#define NTST_xN_RAND_AVX512 NTST_SFENCE_x16_RAND_AVX512 /* #define BLOCK_xN 32 #define LD_xN_RAND_AVX512 LD_x32_RAND_AVX512 #define STWB_xN_RAND_AVX512 STWB_x32_RAND_AVX512 #define NTLD_xN_RAND_AVX512 NTLD_x32_RAND_AVX512 #define NTST_xN_RAND_AVX512 NTST_x32_RAND_AVX512 */ void op_ntld(char* addr, long size); void op_ld(char* addr, long size); void op_ntst(char* addr, long size); void op_st(char* addr, long size); void op_stall(); void op_movdir64B(char* src_addr, char* dst_addr, long size); void op_mixed(char* addr, long size, int ratio); uint64_t op_ntld_32B_lat(char* addr); uint64_t op_ntld_64B_lat(char* addr); uint64_t op_ntst_64B_lat(char* addr); uint64_t op_ld_64B_lat(char* addr); uint64_t op_st_64B_lat(char* addr); uint64_t op_st_cl_flush_64B_lat(char* addr); uint64_t op_st_32B_lat(char* addr); uint64_t op_ptr_chase(char* addr, uint64_t num_chase_block); uint64_t op_ld_block_lat(char* addr, bool flush_block, long num_clear_pipe); uint64_t op_ntld_block_lat(char* addr, bool flush_block, long num_clear_pipe); uint64_t op_stwb_block_lat(char* addr, bool flush_block, long num_clear_pipe); uint64_t op_ntst_block_lat(char* addr, bool flush_block, long num_clear_pipe); /* Assembly to perform non-temporal load */ #define SIZEBTLD_64_AVX512 \ "vmovntdqa 0x0(%%r9, %%r10), %%zmm0 \n" \ "add $0x40, %%r10 \n" #define SIZEBTLD_128_AVX512 \ "vmovntdqa 0x0(%%r9, %%r10), %%zmm0 \n" \ "vmovntdqa 0x40(%%r9, %%r10), %%zmm1 \n" \ "add $0x80, %%r10 \n" #define SIZEBTLD_256_AVX512 \ "vmovntdqa 0x0(%%r9, %%r10), %%zmm0 \n" \ "vmovntdqa 0x40(%%r9, %%r10), %%zmm1 \n" \ "vmovntdqa 0x80(%%r9, %%r10), %%zmm2 \n" \ "vmovntdqa 0xc0(%%r9, %%r10), %%zmm3 \n" \ "add $0x100, %%r10 \n" #define SIZEBTLD_512_AVX512 \ "vmovntdqa 0x0(%%r9, %%r10), %%zmm0 \n" \ "vmovntdqa 0x40(%%r9, %%r10), %%zmm1 \n" \ "vmovntdqa 0x80(%%r9, %%r10), %%zmm2 \n" \ "vmovntdqa 0xc0(%%r9, %%r10), %%zmm3 \n" \ "vmovntdqa 0x100(%%r9, %%r10), %%zmm4 \n" \ "vmovntdqa 0x140(%%r9, %%r10), %%zmm5 \n" \ "vmovntdqa 0x180(%%r9, %%r10), %%zmm6 \n" \ "vmovntdqa 0x1c0(%%r9, %%r10), %%zmm7 \n" \ "add $0x200, %%r10 \n" #define SIZEBTLD_1024_AVX512 \ "vmovntdqa 0x0(%%r9, %%r10), %%zmm0 \n" \ "vmovntdqa 0x40(%%r9, %%r10), %%zmm1 \n" \ "vmovntdqa 0x80(%%r9, %%r10), %%zmm2 \n" \ "vmovntdqa 0xc0(%%r9, %%r10), %%zmm3 \n" \ "vmovntdqa 0x100(%%r9, %%r10), %%zmm4 \n" \ "vmovntdqa 0x140(%%r9, %%r10), %%zmm5 \n" \ "vmovntdqa 0x180(%%r9, %%r10), %%zmm6 \n" \ "vmovntdqa 0x1c0(%%r9, %%r10), %%zmm7 \n" \ "vmovntdqa 0x200(%%r9, %%r10), %%zmm8 \n" \ "vmovntdqa 0x240(%%r9, %%r10), %%zmm9 \n" \ "vmovntdqa 0x280(%%r9, %%r10), %%zmm10 \n" \ "vmovntdqa 0x2c0(%%r9, %%r10), %%zmm11 \n" \ "vmovntdqa 0x300(%%r9, %%r10), %%zmm12 \n" \ "vmovntdqa 0x340(%%r9, %%r10), %%zmm13 \n" \ "vmovntdqa 0x380(%%r9, %%r10), %%zmm14 \n" \ "vmovntdqa 0x3c0(%%r9, %%r10), %%zmm15 \n" \ "add $0x400, %%r10 \n" #define NTLD_x1_RAND_AVX512 \ "vmovntdqa 0xd6c0(%%r11, %%r10), %%zmm0 \n" #define NTLD_x8_RAND_AVX512 \ "vmovntdqa 0xd6c0(%%r11, %%r10), %%zmm0 \n" \ "vmovntdqa 0xb680(%%r11, %%r10), %%zmm1 \n" \ "vmovntdqa 0x7040(%%r11, %%r10), %%zmm2 \n" \ "vmovntdqa 0x36c0(%%r11, %%r10), %%zmm3 \n" \ "vmovntdqa 0x3b80(%%r11, %%r10), %%zmm4 \n" \ "vmovntdqa 0x9340(%%r11, %%r10), %%zmm5 \n" \ "vmovntdqa 0x9ec0(%%r11, %%r10), %%zmm6 \n" \ "vmovntdqa 0x6e80(%%r11, %%r10), %%zmm7 \n" #define NTLD_x16_RAND_AVX512 \ "vmovntdqa 0xc840(%%r11, %%r10), %%zmm0 \n" \ "vmovntdqa 0xf180(%%r11, %%r10), %%zmm1 \n" \ "vmovntdqa 0xce40(%%r11, %%r10), %%zmm2 \n" \ "vmovntdqa 0x300(%%r11, %%r10), %%zmm3 \n" \ "vmovntdqa 0x6d40(%%r11, %%r10), %%zmm4 \n" \ "vmovntdqa 0xa440(%%r11, %%r10), %%zmm5 \n" \ "vmovntdqa 0xa9c0(%%r11, %%r10), %%zmm6 \n" \ "vmovntdqa 0xe980(%%r11, %%r10), %%zmm7 \n" \ "vmovntdqa 0xc940(%%r11, %%r10), %%zmm8 \n" \ "vmovntdqa 0x8200(%%r11, %%r10), %%zmm9 \n" \ "vmovntdqa 0xbac0(%%r11, %%r10), %%zmm10 \n" \ "vmovntdqa 0x8940(%%r11, %%r10), %%zmm11 \n" \ "vmovntdqa 0xe700(%%r11, %%r10), %%zmm12 \n" \ "vmovntdqa 0xe100(%%r11, %%r10), %%zmm13 \n" \ "vmovntdqa 0x8f40(%%r11, %%r10), %%zmm14 \n" \ "vmovntdqa 0xf2c0(%%r11, %%r10), %%zmm15 \n" #define NTLD_x32_RAND_AVX512 \ "vmovntdqa 0x3d80(%%r11, %%r10), %%zmm0 \n" \ "vmovntdqa 0x1780(%%r11, %%r10), %%zmm1 \n" \ "vmovntdqa 0x4700(%%r11, %%r10), %%zmm2 \n" \ "vmovntdqa 0xb980(%%r11, %%r10), %%zmm3 \n" \ "vmovntdqa 0xaa00(%%r11, %%r10), %%zmm4 \n" \ "vmovntdqa 0xad00(%%r11, %%r10), %%zmm5 \n" \ "vmovntdqa 0x9a40(%%r11, %%r10), %%zmm6 \n" \ "vmovntdqa 0x5300(%%r11, %%r10), %%zmm7 \n" \ "vmovntdqa 0x7d40(%%r11, %%r10), %%zmm8 \n" \ "vmovntdqa 0xf480(%%r11, %%r10), %%zmm9 \n" \ "vmovntdqa 0x9480(%%r11, %%r10), %%zmm10 \n" \ "vmovntdqa 0xbd80(%%r11, %%r10), %%zmm11 \n" \ "vmovntdqa 0x3fc0(%%r11, %%r10), %%zmm12 \n" \ "vmovntdqa 0xcdc0(%%r11, %%r10), %%zmm13 \n" \ "vmovntdqa 0x480(%%r11, %%r10), %%zmm14 \n" \ "vmovntdqa 0xb400(%%r11, %%r10), %%zmm15 \n" \ "vmovntdqa 0xb500(%%r11, %%r10), %%zmm16 \n" \ "vmovntdqa 0x49c0(%%r11, %%r10), %%zmm17 \n" \ "vmovntdqa 0x3380(%%r11, %%r10), %%zmm18 \n" \ "vmovntdqa 0x36c0(%%r11, %%r10), %%zmm19 \n" \ "vmovntdqa 0x14c0(%%r11, %%r10), %%zmm20 \n" \ "vmovntdqa 0xcc80(%%r11, %%r10), %%zmm21 \n" \ "vmovntdqa 0xb600(%%r11, %%r10), %%zmm22 \n" \ "vmovntdqa 0x6840(%%r11, %%r10), %%zmm23 \n" \ "vmovntdqa 0x6c80(%%r11, %%r10), %%zmm24 \n" \ "vmovntdqa 0x2c0(%%r11, %%r10), %%zmm25 \n" \ "vmovntdqa 0x62c0(%%r11, %%r10), %%zmm26 \n" \ "vmovntdqa 0x79c0(%%r11, %%r10), %%zmm27 \n" \ "vmovntdqa 0xfe40(%%r11, %%r10), %%zmm28 \n" \ "vmovntdqa 0xc200(%%r11, %%r10), %%zmm29 \n" \ "vmovntdqa 0x58c0(%%r11, %%r10), %%zmm30 \n" \ "vmovntdqa 0x9b40(%%r11, %%r10), %%zmm31 \n" /* Assembly to perform non-temporal store */ #define SIZEBTNT_64_AVX512 \ "vmovntdq %%zmm0, 0x0(%%r9, %%r10) \n" \ "add $0x40, %%r10 \n" #define SIZEBTNT_128_AVX512 \ "vmovntdq %%zmm0, 0x0(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x40(%%r9, %%r10) \n" \ "add $0x80, %%r10 \n" #define SIZEBTNT_256_AVX512 \ "vmovntdq %%zmm0, 0x0(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x40(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x80(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0xc0(%%r9, %%r10) \n" \ "add $0x100, %%r10 \n" #define SIZEBTNT_512_AVX512 \ "vmovntdq %%zmm0, 0x0(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x40(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x80(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0xc0(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x100(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x140(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x180(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x1c0(%%r9, %%r10) \n" \ "add $0x200, %%r10 \n" #define SIZEBTNT_1024_AVX512 \ "vmovntdq %%zmm0, 0x0(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x40(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x80(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0xc0(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x100(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x140(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x180(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x1c0(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x200(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x240(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x280(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x2c0(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x300(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x340(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x380(%%r9, %%r10) \n" \ "vmovntdq %%zmm0, 0x3c0(%%r9, %%r10) \n" \ "add $0x400, %%r10 \n" #define NTST_x1_RAND_AVX512 \ "vmovntdq %%zmm0, 0x9680(%%r11, %%r10) \n" #define NTST_x8_RAND_AVX512 \ "vmovntdq %%zmm0, 0x9680(%%r11, %%r10) \n" \ "vmovntdq %%zmm1, 0x15c0(%%r11, %%r10) \n" \ "vmovntdq %%zmm2, 0x4a80(%%r11, %%r10) \n" \ "vmovntdq %%zmm3, 0xb800(%%r11, %%r10) \n" \ "vmovntdq %%zmm4, 0x9700(%%r11, %%r10) \n" \ "vmovntdq %%zmm5, 0x2000(%%r11, %%r10) \n" \ "vmovntdq %%zmm6, 0x8d40(%%r11, %%r10) \n" \ "vmovntdq %%zmm7, 0xb640(%%r11, %%r10) \n" #define NTST_x16_RAND_AVX512 \ "vmovntdq %%zmm0, 0x3680(%%r11, %%r10) \n" \ "vmovntdq %%zmm1, 0x4140(%%r11, %%r10) \n" \ "vmovntdq %%zmm2, 0x2cc0(%%r11, %%r10) \n" \ "vmovntdq %%zmm3, 0x28c0(%%r11, %%r10) \n" \ "vmovntdq %%zmm4, 0x8440(%%r11, %%r10) \n" \ "vmovntdq %%zmm5, 0xec40(%%r11, %%r10) \n" \ "vmovntdq %%zmm6, 0x1080(%%r11, %%r10) \n" \ "vmovntdq %%zmm7, 0x6e00(%%r11, %%r10) \n" \ "vmovntdq %%zmm8, 0x3300(%%r11, %%r10) \n" \ "vmovntdq %%zmm9, 0xef80(%%r11, %%r10) \n" \ "vmovntdq %%zmm10, 0xb900(%%r11, %%r10) \n" \ "vmovntdq %%zmm11, 0x2280(%%r11, %%r10) \n" \ "vmovntdq %%zmm12, 0x85c0(%%r11, %%r10) \n" \ "vmovntdq %%zmm13, 0x240(%%r11, %%r10) \n" \ "vmovntdq %%zmm14, 0x40c0(%%r11, %%r10) \n" \ "vmovntdq %%zmm15, 0x3100(%%r11, %%r10) \n" #define NTST_x32_RAND_AVX512 \ "vmovntdq %%zmm0, 0x4240(%%r11, %%r10) \n" \ "vmovntdq %%zmm1, 0x6400(%%r11, %%r10) \n" \ "vmovntdq %%zmm2, 0xe4c0(%%r11, %%r10) \n" \ "vmovntdq %%zmm3, 0xf200(%%r11, %%r10) \n" \ "vmovntdq %%zmm4, 0xc400(%%r11, %%r10) \n" \ "vmovntdq %%zmm5, 0x9e80(%%r11, %%r10) \n" \ "vmovntdq %%zmm6, 0xaf80(%%r11, %%r10) \n" \ "vmovntdq %%zmm7, 0xb380(%%r11, %%r10) \n" \ "vmovntdq %%zmm8, 0xc7c0(%%r11, %%r10) \n" \ "vmovntdq %%zmm9, 0x65c0(%%r11, %%r10) \n" \ "vmovntdq %%zmm10, 0x5b40(%%r11, %%r10) \n" \ "vmovntdq %%zmm11, 0x8640(%%r11, %%r10) \n" \ "vmovntdq %%zmm12, 0x67c0(%%r11, %%r10) \n" \ "vmovntdq %%zmm13, 0xaa80(%%r11, %%r10) \n" \ "vmovntdq %%zmm14, 0x7640(%%r11, %%r10) \n" \ "vmovntdq %%zmm15, 0x6d40(%%r11, %%r10) \n" \ "vmovntdq %%zmm16, 0x1400(%%r11, %%r10) \n" \ "vmovntdq %%zmm17, 0x3fc0(%%r11, %%r10) \n" \ "vmovntdq %%zmm18, 0x6640(%%r11, %%r10) \n" \ "vmovntdq %%zmm19, 0x1f40(%%r11, %%r10) \n" \ "vmovntdq %%zmm20, 0x3a00(%%r11, %%r10) \n" \ "vmovntdq %%zmm21, 0x1080(%%r11, %%r10) \n" \ "vmovntdq %%zmm22, 0x9c0(%%r11, %%r10) \n" \ "vmovntdq %%zmm23, 0xf80(%%r11, %%r10) \n" \ "vmovntdq %%zmm24, 0xcb00(%%r11, %%r10) \n" \ "vmovntdq %%zmm25, 0x7e80(%%r11, %%r10) \n" \ "vmovntdq %%zmm26, 0x99c0(%%r11, %%r10) \n" \ "vmovntdq %%zmm27, 0x680(%%r11, %%r10) \n" \ "vmovntdq %%zmm28, 0x12c0(%%r11, %%r10) \n" \ "vmovntdq %%zmm29, 0x2880(%%r11, %%r10) \n" \ "vmovntdq %%zmm30, 0xd140(%%r11, %%r10) \n" \ "vmovntdq %%zmm31, 0xf400(%%r11, %%r10) \n" /* temporal load */ #define SIZELD_1024_AVX512 \ "vmovdqa64 0x0(%%r9, %%r10), %%zmm0 \n" \ "vmovdqa64 0x40(%%r9, %%r10), %%zmm1 \n" \ "vmovdqa64 0x80(%%r9, %%r10), %%zmm2 \n" \ "vmovdqa64 0xc0(%%r9, %%r10), %%zmm3 \n" \ "vmovdqa64 0x100(%%r9, %%r10), %%zmm4 \n" \ "vmovdqa64 0x140(%%r9, %%r10), %%zmm5 \n" \ "vmovdqa64 0x180(%%r9, %%r10), %%zmm6 \n" \ "vmovdqa64 0x1c0(%%r9, %%r10), %%zmm7 \n" \ "vmovdqa64 0x200(%%r9, %%r10), %%zmm8 \n" \ "vmovdqa64 0x240(%%r9, %%r10), %%zmm9 \n" \ "vmovdqa64 0x280(%%r9, %%r10), %%zmm10 \n" \ "vmovdqa64 0x2c0(%%r9, %%r10), %%zmm11 \n" \ "vmovdqa64 0x300(%%r9, %%r10), %%zmm12 \n" \ "vmovdqa64 0x340(%%r9, %%r10), %%zmm13 \n" \ "vmovdqa64 0x380(%%r9, %%r10), %%zmm14 \n" \ "vmovdqa64 0x3c0(%%r9, %%r10), %%zmm15 \n" \ "add $0x400, %%r10 \n" #define LD_x1_RAND_AVX512 \ "vmovdqa64 0x4140(%%r11, %%r10), %%zmm0 \n" #define LD_x8_RAND_AVX512 \ "vmovdqa64 0x4140(%%r11, %%r10), %%zmm0 \n" \ "vmovdqa64 0xf340(%%r11, %%r10), %%zmm1 \n" \ "vmovdqa64 0x2640(%%r11, %%r10), %%zmm2 \n" \ "vmovdqa64 0x1000(%%r11, %%r10), %%zmm3 \n" \ "vmovdqa64 0xda40(%%r11, %%r10), %%zmm4 \n" \ "vmovdqa64 0x5200(%%r11, %%r10), %%zmm5 \n" \ "vmovdqa64 0x180(%%r11, %%r10), %%zmm6 \n" \ "vmovdqa64 0xf3c0(%%r11, %%r10), %%zmm7 \n" #define LD_LFENCE_x16_RAND_AVX512 \ "vmovdqa64 0x2a80(%%r11, %%r10), %%zmm0 \n lfence \n" \ "vmovdqa64 0x680(%%r11, %%r10), %%zmm1 \n lfence \n" \ "vmovdqa64 0x8500(%%r11, %%r10), %%zmm2 \n lfence \n" \ "vmovdqa64 0x8980(%%r11, %%r10), %%zmm3 \n lfence \n" \ "vmovdqa64 0x6d40(%%r11, %%r10), %%zmm4 \n lfence \n" \ "vmovdqa64 0xf7c0(%%r11, %%r10), %%zmm5 \n lfence \n" \ "vmovdqa64 0x4640(%%r11, %%r10), %%zmm6 \n lfence \n" \ "vmovdqa64 0x1480(%%r11, %%r10), %%zmm7 \n lfence \n" \ "vmovdqa64 0x2f00(%%r11, %%r10), %%zmm8 \n lfence \n" \ "vmovdqa64 0x15c0(%%r11, %%r10), %%zmm9 \n lfence \n" \ "vmovdqa64 0xf100(%%r11, %%r10), %%zmm10 \n lfence \n" \ "vmovdqa64 0x66c0(%%r11, %%r10), %%zmm11 \n lfence \n" \ "vmovdqa64 0xe240(%%r11, %%r10), %%zmm12 \n lfence \n" \ "vmovdqa64 0xf480(%%r11, %%r10), %%zmm13 \n lfence \n" \ "vmovdqa64 0x84c0(%%r11, %%r10), %%zmm14 \n lfence \n" \ "vmovdqa64 0xe480(%%r11, %%r10), %%zmm15 \n lfence \n" #define LD_x16_RAND_AVX512 \ "vmovdqa64 0xc300(%%r11, %%r10), %%zmm0 \n" \ "vmovdqa64 0xda00(%%r11, %%r10), %%zmm1 \n" \ "vmovdqa64 0x1980(%%r11, %%r10), %%zmm2 \n" \ "vmovdqa64 0xddc0(%%r11, %%r10), %%zmm3 \n" \ "vmovdqa64 0xaa00(%%r11, %%r10), %%zmm4 \n" \ "vmovdqa64 0x5540(%%r11, %%r10), %%zmm5 \n" \ "vmovdqa64 0x6740(%%r11, %%r10), %%zmm6 \n" \ "vmovdqa64 0x5a80(%%r11, %%r10), %%zmm7 \n" \ "vmovdqa64 0xa680(%%r11, %%r10), %%zmm8 \n" \ "vmovdqa64 0xdb00(%%r11, %%r10), %%zmm9 \n" \ "vmovdqa64 0x3340(%%r11, %%r10), %%zmm10 \n" \ "vmovdqa64 0x7e40(%%r11, %%r10), %%zmm11 \n" \ "vmovdqa64 0x3600(%%r11, %%r10), %%zmm12 \n" \ "vmovdqa64 0x5080(%%r11, %%r10), %%zmm13 \n" \ "vmovdqa64 0x6e00(%%r11, %%r10), %%zmm14 \n" \ "vmovdqa64 0x1540(%%r11, %%r10), %%zmm15 \n" #define LD_x32_RAND_AVX512 \ "vmovdqa64 0x7b40(%%r11, %%r10), %%zmm0 \n" \ "vmovdqa64 0x7640(%%r11, %%r10), %%zmm1 \n" \ "vmovdqa64 0xdf00(%%r11, %%r10), %%zmm2 \n" \ "vmovdqa64 0xdb40(%%r11, %%r10), %%zmm3 \n" \ "vmovdqa64 0xb6c0(%%r11, %%r10), %%zmm4 \n" \ "vmovdqa64 0x6980(%%r11, %%r10), %%zmm5 \n" \ "vmovdqa64 0xf280(%%r11, %%r10), %%zmm6 \n" \ "vmovdqa64 0x3dc0(%%r11, %%r10), %%zmm7 \n" \ "vmovdqa64 0x6d80(%%r11, %%r10), %%zmm8 \n" \ "vmovdqa64 0xf580(%%r11, %%r10), %%zmm9 \n" \ "vmovdqa64 0xf300(%%r11, %%r10), %%zmm10 \n" \ "vmovdqa64 0x3140(%%r11, %%r10), %%zmm11 \n" \ "vmovdqa64 0x8980(%%r11, %%r10), %%zmm12 \n" \ "vmovdqa64 0xecc0(%%r11, %%r10), %%zmm13 \n" \ "vmovdqa64 0xc5c0(%%r11, %%r10), %%zmm14 \n" \ "vmovdqa64 0x1e40(%%r11, %%r10), %%zmm15 \n" \ "vmovdqa64 0xf3c0(%%r11, %%r10), %%zmm16 \n" \ "vmovdqa64 0xe800(%%r11, %%r10), %%zmm17 \n" \ "vmovdqa64 0x2200(%%r11, %%r10), %%zmm18 \n" \ "vmovdqa64 0x66c0(%%r11, %%r10), %%zmm19 \n" \ "vmovdqa64 0xc00(%%r11, %%r10), %%zmm20 \n" \ "vmovdqa64 0x2bc0(%%r11, %%r10), %%zmm21 \n" \ "vmovdqa64 0x6a80(%%r11, %%r10), %%zmm22 \n" \ "vmovdqa64 0x94c0(%%r11, %%r10), %%zmm23 \n" \ "vmovdqa64 0xbec0(%%r11, %%r10), %%zmm24 \n" \ "vmovdqa64 0xcdc0(%%r11, %%r10), %%zmm25 \n" \ "vmovdqa64 0xf80(%%r11, %%r10), %%zmm26 \n" \ "vmovdqa64 0xc000(%%r11, %%r10), %%zmm27 \n" \ "vmovdqa64 0x4340(%%r11, %%r10), %%zmm28 \n" \ "vmovdqa64 0x4640(%%r11, %%r10), %%zmm29 \n" \ "vmovdqa64 0xcc0(%%r11, %%r10), %%zmm30 \n" \ "vmovdqa64 0x6b40(%%r11, %%r10), %%zmm31 \n" #define STWB_x1_RAND_AVX512 \ "vmovdqa64 %%zmm0, 0xe80(%%r11, %%r10) \n clwb 0xe80(%%r11, %%r10) \n" #define STWB_x8_RAND_AVX512 \ "vmovdqa64 %%zmm0, 0xe80(%%r11, %%r10) \n clwb 0xe80(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm1, 0xe4c0(%%r11, %%r10) \n clwb 0xe4c0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm2, 0x4780(%%r11, %%r10) \n clwb 0x4780(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm3, 0xc240(%%r11, %%r10) \n clwb 0xc240(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm4, 0x2e00(%%r11, %%r10) \n clwb 0x2e00(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm5, 0xf4c0(%%r11, %%r10) \n clwb 0xf4c0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm6, 0xe5c0(%%r11, %%r10) \n clwb 0xe5c0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm7, 0x7040(%%r11, %%r10) \n clwb 0x7040(%%r11, %%r10) \n" //#define STWB_SFENCE_x16_RAND_AVX512 #define STWB_x16_RAND_AVX512 \ "vmovdqa64 %%zmm0, 0x28c0(%%r11, %%r10) \n clwb 0x28c0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm1, 0xc880(%%r11, %%r10) \n clwb 0xc880(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm2, 0x3cc0(%%r11, %%r10) \n clwb 0x3cc0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm3, 0xdd40(%%r11, %%r10) \n clwb 0xdd40(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm4, 0x6bc0(%%r11, %%r10) \n clwb 0x6bc0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm5, 0xe600(%%r11, %%r10) \n clwb 0xe600(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm6, 0x1c0(%%r11, %%r10) \n clwb 0x1c0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm7, 0xf540(%%r11, %%r10) \n clwb 0xf540(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm8, 0x11c0(%%r11, %%r10) \n clwb 0x11c0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm9, 0xb000(%%r11, %%r10) \n clwb 0xb000(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm10, 0x3f80(%%r11, %%r10) \n clwb 0x3f80(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm11, 0x5c40(%%r11, %%r10) \n clwb 0x5c40(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm12, 0xed00(%%r11, %%r10) \n clwb 0xed00(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm13, 0xd600(%%r11, %%r10) \n clwb 0xd600(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm14, 0x4c80(%%r11, %%r10) \n clwb 0x4c80(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm15, 0xb280(%%r11, %%r10) \n clwb 0xb280(%%r11, %%r10) \n" /* temporal store */ #define STWB_x32_RAND_AVX512 \ "vmovdqa64 %%zmm0, 0x9c0(%%r11, %%r10) \n clwb 0x9c0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm1, 0x3b40(%%r11, %%r10) \n clwb 0x3b40(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm2, 0xe540(%%r11, %%r10) \n clwb 0xe540(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm3, 0xe180(%%r11, %%r10) \n clwb 0xe180(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm4, 0x2b80(%%r11, %%r10) \n clwb 0x2b80(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm5, 0xa380(%%r11, %%r10) \n clwb 0xa380(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm6, 0x9ac0(%%r11, %%r10) \n clwb 0x9ac0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm7, 0xd500(%%r11, %%r10) \n clwb 0xd500(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm8, 0x51c0(%%r11, %%r10) \n clwb 0x51c0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm9, 0x99c0(%%r11, %%r10) \n clwb 0x99c0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm10, 0xacc0(%%r11, %%r10) \n clwb 0xacc0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm11, 0x4900(%%r11, %%r10) \n clwb 0x4900(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm12, 0x3540(%%r11, %%r10) \n clwb 0x3540(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm13, 0x8ac0(%%r11, %%r10) \n clwb 0x8ac0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm14, 0x2580(%%r11, %%r10) \n clwb 0x2580(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm15, 0xc5c0(%%r11, %%r10) \n clwb 0xc5c0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm16, 0xfd40(%%r11, %%r10) \n clwb 0xfd40(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm17, 0xac40(%%r11, %%r10) \n clwb 0xac40(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm18, 0x1240(%%r11, %%r10) \n clwb 0x1240(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm19, 0xa00(%%r11, %%r10) \n clwb 0xa00(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm20, 0x53c0(%%r11, %%r10) \n clwb 0x53c0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm21, 0xcd00(%%r11, %%r10) \n clwb 0xcd00(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm22, 0xbac0(%%r11, %%r10) \n clwb 0xbac0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm23, 0x2500(%%r11, %%r10) \n clwb 0x2500(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm24, 0xd300(%%r11, %%r10) \n clwb 0xd300(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm25, 0xba40(%%r11, %%r10) \n clwb 0xba40(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm26, 0xf500(%%r11, %%r10) \n clwb 0xf500(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm27, 0x2080(%%r11, %%r10) \n clwb 0x2080(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm28, 0xf2c0(%%r11, %%r10) \n clwb 0xf2c0(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm29, 0xa980(%%r11, %%r10) \n clwb 0xa980(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm30, 0x8880(%%r11, %%r10) \n clwb 0x8880(%%r11, %%r10) \n" \ "vmovdqa64 %%zmm31, 0x54c0(%%r11, %%r10) \n clwb 0x54c0(%%r11, %%r10) \n" #define SIZESTWB_1024_AVX512 \ "vmovdqa64 %%zmm0, 0x0(%%r9, %%r10) \n" \ "clwb 0x0(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x40(%%r9, %%r10) \n" \ "clwb 0x40(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x80(%%r9, %%r10) \n" \ "clwb 0x80(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0xc0(%%r9, %%r10) \n" \ "clwb 0xc0(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x100(%%r9, %%r10) \n" \ "clwb 0x100(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x140(%%r9, %%r10) \n" \ "clwb 0x140(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x180(%%r9, %%r10) \n" \ "clwb 0x180(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x1c0(%%r9, %%r10) \n" \ "clwb 0x1c0(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x200(%%r9, %%r10) \n" \ "clwb 0x200(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x240(%%r9, %%r10) \n" \ "clwb 0x240(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x280(%%r9, %%r10) \n" \ "clwb 0x280(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x2c0(%%r9, %%r10) \n" \ "clwb 0x2c0(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x300(%%r9, %%r10) \n" \ "clwb 0x300(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x340(%%r9, %%r10) \n" \ "clwb 0x340(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x380(%%r9, %%r10) \n" \ "clwb 0x380(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x3c0(%%r9, %%r10) \n" \ "clwb 0x3c0(%%r9, %%r10) \n" \ "add $0x400, %%r10 \n" #define SIZEST_1024_AVX512 \ "vmovdqa64 %%zmm0, 0x0(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x40(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x80(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0xc0(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x100(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x140(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x180(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x1c0(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x200(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x240(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x280(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x2c0(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x300(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x340(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x380(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x3c0(%%r9, %%r10) \n" \ "add $0x400, %%r10 \n" /* perform movdir64B */ #define SIZEMOV_1024 \ "movdir64b 0x0(%%r9, %%r10), %%r12 \n" \ "add $0x40, %%r12 \n" \ "movdir64b 0x40(%%r9, %%r10), %%r12\n" \ "add $0x40, %%r12 \n" \ "movdir64b 0x80(%%r9, %%r10), %%r12\n" \ "add $0x40, %%r12 \n" \ "movdir64b 0xc0(%%r9, %%r10), %%r12\n" \ "add $0x40, %%r12 \n" \ "movdir64b 0x100(%%r9, %%r10), %%r12\n" \ "add $0x40, %%r12 \n" \ "movdir64b 0x140(%%r9, %%r10), %%r12\n" \ "add $0x40, %%r12 \n" \ "movdir64b 0x180(%%r9, %%r10), %%r12\n" \ "add $0x40, %%r12 \n" \ "movdir64b 0x1c0(%%r9, %%r10), %%r12\n" \ "add $0x40, %%r12 \n" \ "movdir64b 0x200(%%r9, %%r10), %%r12\n" \ "add $0x40, %%r12 \n" \ "movdir64b 0x240(%%r9, %%r10), %%r12\n" \ "add $0x40, %%r12 \n" \ "movdir64b 0x280(%%r9, %%r10), %%r12\n" \ "add $0x40, %%r12 \n" \ "movdir64b 0x2c0(%%r9, %%r10), %%r12\n" \ "add $0x40, %%r12 \n" \ "movdir64b 0x300(%%r9, %%r10), %%r12\n" \ "add $0x40, %%r12 \n" \ "movdir64b 0x340(%%r9, %%r10), %%r12\n" \ "add $0x40, %%r12 \n" \ "movdir64b 0x380(%%r9, %%r10), %%r12\n" \ "add $0x40, %%r12 \n" \ "movdir64b 0x3c0(%%r9, %%r10), %%r12\n" \ "add $0x40, %%r12 \n" \ "add $0x400, %%r10 \n" \ /* Mixed read and write */ /* try using the same dest reg. Assign some value to zmm0 for storing. */ #define SIZE_R1W1_512 \ "vmovdqa64 0x0(%%r9, %%r10), %%zmm0 \n" \ "vmovdqa64 0x40(%%r9, %%r10), %%zmm1 \n" \ "vmovdqa64 0x80(%%r9, %%r10), %%zmm2 \n" \ "vmovdqa64 0xc0(%%r9, %%r10), %%zmm3 \n" \ "" \ "vmovq %1, %%xmm0 \n" \ "vmovdqa64 %%zmm0, 0x0(%%r9, %%r10) \n" \ "clwb 0x0(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x40(%%r9, %%r10) \n" \ "clwb 0x40(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x80(%%r9, %%r10) \n" \ "clwb 0x80(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0xc0(%%r9, %%r10) \n" \ "clwb 0xc0(%%r9, %%r10) \n" \ "add $0x200, %%r10 \n" \ #define SIZE_R2W1_576 \ "vmovdqa64 0x0(%%r9, %%r10), %%zmm0 \n" \ "vmovdqa64 0x40(%%r9, %%r10), %%zmm0 \n" \ "vmovdqa64 0x80(%%r9, %%r10), %%zmm0 \n" \ "vmovdqa64 0xc0(%%r9, %%r10), %%zmm0 \n" \ "vmovdqa64 0x100(%%r9, %%r10), %%zmm0 \n" \ "vmovdqa64 0x140(%%r9, %%r10), %%zmm0 \n" \ "vmovq %1, %%xmm1 \n" \ "vmovdqa64 %%zmm1, 0x0(%%r9, %%r10) \n" \ "clwb 0x0(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm1, 0x40(%%r9, %%r10) \n" \ "clwb 0x40(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm1, 0x80(%%r9, %%r10) \n" \ "clwb 0x80(%%r9, %%r10) \n" \ "add $0x180, %%r10 \n" \ #define SIZE_R2W1_384 \ "vmovdqa64 0x0(%%r9, %%r10), %%zmm0 \n" \ "vmovdqa64 0x40(%%r9, %%r10), %%zmm1 \n" \ "vmovdqa64 0x80(%%r9, %%r10), %%zmm2 \n" \ "vmovdqa64 0xc0(%%r9, %%r10), %%zmm3 \n" \ "vmovq %1, %%xmm0 \n" \ "vmovdqa64 %%zmm0, 0x0(%%r9, %%r10) \n" \ "clwb 0x0(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x40(%%r9, %%r10) \n" \ "clwb 0x40(%%r9, %%r10) \n" \ "add $0x180, %%r10 \n" \ #define SIZE_R3W1_512 \ "vmovdqa64 0x0(%%r9, %%r10), %%zmm0 \n" \ "vmovdqa64 0x40(%%r9, %%r10), %%zmm1 \n" \ "vmovdqa64 0x80(%%r9, %%r10), %%zmm2 \n" \ "vmovdqa64 0xc0(%%r9, %%r10), %%zmm3 \n" \ "vmovdqa64 0x100(%%r9, %%r10), %%zmm4 \n" \ "vmovdqa64 0x140(%%r9, %%r10), %%zmm5 \n" \ "" \ "vmovq %1, %%xmm0 \n" \ "vmovdqa64 %%zmm0, 0x0(%%r9, %%r10) \n" \ "clwb 0x0(%%r9, %%r10) \n" \ "vmovdqa64 %%zmm0, 0x40(%%r9, %%r10) \n" \ "clwb 0x40(%%r9, %%r10) \n" \ "add $0x200, %%r10 \n" \ /* snippets for latency measuring */ /* Assembly instructions utilize the following registers: * rsi: memory address * rax, rdx, rcx, r8d and r9d: timing * rdx: populating cache-lines * ymm0: streaming instructions */ #define REGISTERS "rsi", "rax", "rdx", "rcx", "r8", "r9", "ymm0" /* rdtscp: reads current timestamp to EDX:EAX and also sets ECX * higher 32-bits of RAX, RDX and RCX are cleared * * r9d = old EDX * r8d = old EAX * Here is what we do to compute t_start and t_end: * - RDX holds t_end * - RAX holds t_start */ /** Douglas: read this blog for more info about timing * http://sites.utexas.edu/jdm4372/2018/07/23/comments-on-timing-short-code-sections-on-intel-processors/ */ #define TIMING_BEGIN "rdtscp \n" \ "lfence \n" \ "mov %%edx, %%r9d \n" \ "mov %%eax, %%r8d \n" #define TIMING_END "mfence \n" \ "rdtscp \n" \ "lfence \n" \ "shl $32, %%rdx \n" \ "or %%rax, %%rdx \n" \ "mov %%r9d, %%eax \n" \ "shl $32, %%rax \n" \ "or %%r8, %%rax \n" \ "mov %%rax, %[t_start] \n" \ "mov %%rdx, %[t_end] \n" #define FLUSH_64K_BLOCK \ "LOOP_64K_BLOCK_FLUSH: \n" \ "clflush (%%r11, %%r10) \n" \ "add $0x40, %%r10 \n" \ "cmp $0x10000, %%r10 \n" \ "jl LOOP_64K_BLOCK_FLUSH\n" \ "xor %%r10, %%r10 \n" \ "mfence \n" #define FLUSH_CACHE_LINE "clflush 0*32(%%rsi) \n" \ "clflush 2*32(%%rsi) \n" \ "clflush 4*32(%%rsi) \n" \ "clflush 6*32(%%rsi) \n" \ "mfence \n" /* #define FLUSH_CACHE_LINE "nop \n" */ #define CLEAR_PIPELINE "nop \nnop \nnop \nnop \nnop \nnop \n" \ "nop \nnop \nnop \nnop \nnop \nnop \n" \ "nop \nnop \nnop \nnop \nnop \nnop \n" \ "nop \nnop \nnop \nnop \nnop \nnop \n" \ "nop \nnop \nnop \nnop \nnop \nnop \n" \ "nop \nnop \nnop \nnop \nnop \nnop \n" \ "nop \nnop \nnop \nnop \nnop \nnop \n" \ "nop \nnop \nnop \nnop \nnop \nnop \n" \ "nop \nnop \nnop \nnop \nnop \nnop \n" \ "nop \nnop \nnop \nnop \nnop \nnop \n" \ "nop \nnop \nnop \nnop \nnop \nnop \n" \ "nop \nnop \nnop \nnop \nnop \nnop \n" \ "nop \nnop \nnop \nnop \nnop \nnop \n" \ "nop \nnop \nnop \nnop \nnop \nnop \n" \ "nop \nnop \nnop \nnop \nnop \nnop \n" \ "nop \nnop \nnop \nnop \nnop \nnop \n" #define CLEAR_PIPELINE_x16 CLEAR_PIPELINE \ CLEAR_PIPELINE \ CLEAR_PIPELINE \ CLEAR_PIPELINE \ CLEAR_PIPELINE \ CLEAR_PIPELINE \ CLEAR_PIPELINE \ CLEAR_PIPELINE \ CLEAR_PIPELINE \ CLEAR_PIPELINE \ CLEAR_PIPELINE \ CLEAR_PIPELINE \ CLEAR_PIPELINE \ CLEAR_PIPELINE \ CLEAR_PIPELINE \ CLEAR_PIPELINE #endif // WORKLOAD_H