caching - How to get L3 cache info (size, line length) on Intel processor using cpuid? -


i encountered trouble during getting l3 cache info on intel processors. getting l3 line length on amd simple, this:

mov eax, 0x80000006  cpuid shl edx, 24 shr edx, 24 

the same operation on intels more complicated. got might done using sequence:

mov eax, 2 cpuid 

and pasring register values manual: http://www.microbe.cz/docs/cpuid.pdf (page 26, "table 2-7. descriptor decode values").

but program not found of enumerated descriptors , returns 0 cache size , line length.

is there simpler and/or sufficient method cache size , line length on intels?

here full code. cpuid output (eax, ebx, ecx, edx) pushed onto stack, each value compared hardcoded descriptors list. comparation made on lower 8 bits, these bits shrinked.

__declspec(dllexport) __declspec(naked) void getmetriclevel2(int &length) {     __asm {         // check cpuid availability         pushfd         pop eax         mov ebx, eax         xor eax, 00200000h         push eax         popfd         pushfd         pop eax         cmp eax, ebx         jnz has_cpuid         mov edx, -1 // return -1 reference         jmp ret_arg has_cpuid:         mov eax, 2 // l3 intel, incomplete         mov ecx, 0         cpuid         push ecx         or ecx, eax         or ecx, ebx         or ecx, edx         cmp ecx, 0         pop ecx // experimental         je cpu_amd // if registers 0, try amd scheme cpu_intel:         push ebp         mov ebp, 0         push 0         push eax // store counter         jmp call_begin cycle_begin:         pop ecx         inc ecx         push ecx         push eax         mov eax, 2         cpuid call_begin:         push eax         push ebx         push ecx         push edx         mov ch, 4 parse_reg:         pop edx         mov cl, 4 parse_descr: dd0h://512,4w         cmp dl, 0xd0         jne dd1h         add ebp, 512d         jmp miss_l3cache dd1h://1024,4w         cmp dl, 0xd1         jne dd2h         add ebp, 1024d         jmp miss_l3cache dd2h://2048,4w         cmp dl, 0xd2         jne dd6h         add ebp, 2048d         jmp miss_l3cache dd6h://1024,8w         cmp dl, 0xd6         jne dd7h         add ebp, 1024d         jmp miss_l3cache dd7h://2048,8w         cmp dl, 0xd7         jne dd8h         add ebp, 2048d         jmp miss_l3cache dd8h://4096,8w         cmp dl, 0xd8         jne ddch         add ebp, 4096d         jmp miss_l3cache ddch://1536,12w         cmp dl, 0xdc         jne dddh         add ebp, 1536d         jmp miss_l3cache dddh://3072,12w         cmp dl, 0xdd         jne ddeh         add ebp, 3072d         jmp miss_l3cache ddeh://6144,12w         cmp dl, 0xde         jne de2h         add ebp, 6144d         jmp miss_l3cache de2h://2048,16w         cmp dl, 0xe2         jne de3h         add ebp, 2048d         jmp miss_l3cache de3h://4096,16w         cmp dl, 0xe3         jne de4h         add ebp, 4096d         jmp miss_l3cache de4h://8192,16w         cmp dl, 0xe4         jne deah         add ebp, 8192d         jmp miss_l3cache deah://12mb,24w         cmp dl, 0xea         jne debh         add ebp, 12288d         jmp miss_l3cache debh://18mb,24w         cmp dl, 0xeb         jne dech         add ebp, 18432d         jmp miss_l3cache dech://24mb,24w         cmp dl, 0xec         jne miss_l3cache         add ebp, 24576d miss_l3cache:         dec cl         cmp cl, 0         shr edx, 8 // it's 8-bit descriptor         jne parse_descr         dec ch         cmp ch, 0         jne parse_reg call_finish:         pop eax         cmp al, 0         je cycle_finish // replace je         dec al         jmp cycle_begin cycle_finish:         mov edx, ebp         shl edx, 8 // 8 bits cache string length         mov dl, 64d // intel has 64 byte l3 string         add esp, 4         pop ebp         jmp ret_arg cpu_amd:         mov eax, 0x80000006 // l3 amd         cpuid         shl edx, 24         shr edx, 24 ret_arg:         mov eax, [esp+4] // first argument lies here         mov [eax], edx // return reference         ret     } } 

there number of problems code. should use __cpuid compiler intrinsic , write in c++. it'll make code easier write , maintain.

there 2 major problems code. first you're not using cpuid function 2 correctly. value in ecx ignored when use function. second you're not using cpuid function 4 determine cache size when function 2 returns 0ffh descriptor.

other problems code include:

  • not ignoring invalid register values returned function 2 high bit set.
  • not handling number of cache descriptors describe l3 caches.
  • your inner loop byte counter isn't used since shr edx, 8 sets flags. loop works anyways because when edx becomes 0 doesn't contain more possible l3 descriptors.

part of problem you're using outdated manual. should use latest intel software developers manual.

it's not tested, it's got transcription errors in cache descriptor switch statement, here's c implementation uses cpuid functions 2 , 4 determine size, associativity , cache line size of l3 cache:

#include <intrin.h>  int get_intel_l3_info(unsigned *size, unsigned *assoc, unsigned *linesize) {     int regs[4];     int i;      __cpuid(regs, 0); /* maximum input value */     int max_leaf = regs[0];     if (max_leaf < 2) {         return -1; /* no way find l3 cache info */     }      __cpuid(regs, 1); /* additional information */     int family = (regs[0] >> 8) & 0xf;     int model = (regs[0] >> 4) & 0xf;      __cpuid(regs, 2); /* cache , tlb information */      regs[0] &= 0xffffff00; /* least significant byte of eax invalid */     (i = 0; < 4; i++) {         if (regs[i] < 0) { /* invalid if significant bit set */             regs[i] = 0;         }     }      unsigned char *descriptors = (unsigned char *) regs;      const int kb = 1024;     const int mb = 1024 * kb;  #define retinfo(s, a, l) *size = (s); *assoc = (a); *linesize = (l); return 0      int use_leaf_4 = 0;     (i = 0; < 32; i++) {         switch(descriptors[i]) {         case 0x22: retinfo(512 * kb, 4, 64);         case 0x23: retinfo(1 * mb, 8, 64);         case 0x25: retinfo(2 * mb, 8, 64);         case 0x29: retinfo(4 * mb, 8, 64);         case 0x40: retinfo(0, 0, 0); /* no l3 cache */         case 0x46: retinfo(4 * mb, 4, 64);         case 0x47: retinfo(8 * mb, 8, 64);         case 0x49:             if (family == 0x0f && model == 0x06) {                 retinfo(4 * mb, 16, 64);             }             break;         case 0x4a: retinfo(6 * mb, 12, 64);         case 0x4b: retinfo(8 * mb, 16, 64);         case 0x4c: retinfo(12 * mb, 12, 64);         case 0x4d: retinfo(16  * mb, 16, 64);         case 0xd0: retinfo(512 * kb, 4, 64);         case 0xd1: retinfo(1 * mb, 4, 64);         case 0xd6: retinfo(1 * mb, 8, 64);         case 0xd7: retinfo(2 * mb, 8, 64);         case 0xd8: retinfo(4 * mb, 8, 64);         case 0xdc: retinfo(1 * mb + 512 * kb, 12, 64);         case 0xdd: retinfo(3 * mb, 12, 64);         case 0xde: retinfo(6 * mb, 12, 64);         case 0xe2: retinfo(2 * mb, 16, 64);         case 0xe3: retinfo(4 * mb, 16, 64);         case 0xe4: retinfo(8 * mb, 16, 64);         case 0xea: retinfo(12 * mb, 24, 64);         case 0xeb: retinfo(18 * mb, 24, 64);         case 0xec: retinfo(24 * mb, 24, 64);          case 0xff:             use_leaf_4 = 1;             break;         }     }      if (!use_leaf_4 || max_leaf < 4) {         return -1; /* failed, no l3 info found */     }      = 0;     while(1) {         __cpuidex(regs, 4, i); /* deterministic cache parameters */         if ((regs[0] & 0x1f) == 0) {             return retinfo(0, 0, 0); /* no l3 cache */         }         if (((regs[0] >> 5) & 0x7) == 3) {             int lsize = (regs[1] & 0xfff) + 1;             int partitions = ((regs[1] >> 12) & 0x3ff) + 1;             int ways = ((regs[1] >> 22) & 0x3ff) + 1;             int sets = regs[2] + 1;             retinfo(ways * partitions * lsize * sets,                 ways, lsize);         }         i++;     } } 

Comments

Popular posts from this blog

html - Styling progress bar with inline style -

java - Oracle Sql developer error: could not install some modules -

How to use autoclose brackets in Jupyter notebook? -