cpu-features.c 33 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090
  1. /*
  2. * Copyright (C) 2010 The Android Open Source Project
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * * Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * * Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in
  12. * the documentation and/or other materials provided with the
  13. * distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  16. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  17. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  18. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  19. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  20. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  21. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  22. * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  23. * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  25. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26. * SUCH DAMAGE.
  27. */
  28. /* ChangeLog for this library:
  29. *
  30. * NDK r8d: Add android_setCpu().
  31. *
  32. * NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16,
  33. * VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt.
  34. *
  35. * Rewrite the code to parse /proc/self/auxv instead of
  36. * the "Features" field in /proc/cpuinfo.
  37. *
  38. * Dynamically allocate the buffer that hold the content
  39. * of /proc/cpuinfo to deal with newer hardware.
  40. *
  41. * NDK r7c: Fix CPU count computation. The old method only reported the
  42. * number of _active_ CPUs when the library was initialized,
  43. * which could be less than the real total.
  44. *
  45. * NDK r5: Handle buggy kernels which report a CPU Architecture number of 7
  46. * for an ARMv6 CPU (see below).
  47. *
  48. * Handle kernels that only report 'neon', and not 'vfpv3'
  49. * (VFPv3 is mandated by the ARM architecture is Neon is implemented)
  50. *
  51. * Handle kernels that only report 'vfpv3d16', and not 'vfpv3'
  52. *
  53. * Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in
  54. * android_getCpuFamily().
  55. *
  56. * NDK r4: Initial release
  57. */
  58. #if defined(__le32__)
  59. // When users enter this, we should only provide interface and
  60. // libportable will give the implementations.
  61. #else // !__le32__
  62. #include <sys/system_properties.h>
  63. #include <pthread.h>
  64. #include "cpu-features.h"
  65. #include <stdio.h>
  66. #include <stdlib.h>
  67. #include <fcntl.h>
  68. #include <errno.h>
  69. static pthread_once_t g_once;
  70. static int g_inited;
  71. static AndroidCpuFamily g_cpuFamily;
  72. static uint64_t g_cpuFeatures;
  73. static int g_cpuCount;
  74. #ifdef __arm__
  75. static uint32_t g_cpuIdArm;
  76. #endif
  77. static const int android_cpufeatures_debug = 0;
  78. #ifdef __arm__
  79. # define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_ARM
  80. #elif defined __i386__
  81. # define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_X86
  82. #else
  83. # define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_UNKNOWN
  84. #endif
  85. #define D(...) \
  86. do { \
  87. if (android_cpufeatures_debug) { \
  88. printf(__VA_ARGS__); fflush(stdout); \
  89. } \
  90. } while (0)
  91. #ifdef __i386__
  92. static __inline__ void x86_cpuid(int func, int values[4])
  93. {
  94. int a, b, c, d;
  95. /* We need to preserve ebx since we're compiling PIC code */
  96. /* this means we can't use "=b" for the second output register */
  97. __asm__ __volatile__ ( \
  98. "push %%ebx\n"
  99. "cpuid\n" \
  100. "mov %%ebx, %1\n"
  101. "pop %%ebx\n"
  102. : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
  103. : "a" (func) \
  104. );
  105. values[0] = a;
  106. values[1] = b;
  107. values[2] = c;
  108. values[3] = d;
  109. }
  110. #endif
  111. /* Get the size of a file by reading it until the end. This is needed
  112. * because files under /proc do not always return a valid size when
  113. * using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed.
  114. */
  115. static int
  116. get_file_size(const char* pathname)
  117. {
  118. int fd, result = 0;
  119. char buffer[256];
  120. fd = open(pathname, O_RDONLY);
  121. if (fd < 0) {
  122. D("Can't open %s: %s\n", pathname, strerror(errno));
  123. return -1;
  124. }
  125. for (;;) {
  126. int ret = read(fd, buffer, sizeof buffer);
  127. if (ret < 0) {
  128. if (errno == EINTR)
  129. continue;
  130. D("Error while reading %s: %s\n", pathname, strerror(errno));
  131. break;
  132. }
  133. if (ret == 0)
  134. break;
  135. result += ret;
  136. }
  137. close(fd);
  138. return result;
  139. }
  140. /* Read the content of /proc/cpuinfo into a user-provided buffer.
  141. * Return the length of the data, or -1 on error. Does *not*
  142. * zero-terminate the content. Will not read more
  143. * than 'buffsize' bytes.
  144. */
  145. static int
  146. read_file(const char* pathname, char* buffer, size_t buffsize)
  147. {
  148. int fd, count;
  149. fd = open(pathname, O_RDONLY);
  150. if (fd < 0) {
  151. D("Could not open %s: %s\n", pathname, strerror(errno));
  152. return -1;
  153. }
  154. count = 0;
  155. while (count < (int)buffsize) {
  156. int ret = read(fd, buffer + count, buffsize - count);
  157. if (ret < 0) {
  158. if (errno == EINTR)
  159. continue;
  160. D("Error while reading from %s: %s\n", pathname, strerror(errno));
  161. if (count == 0)
  162. count = -1;
  163. break;
  164. }
  165. if (ret == 0)
  166. break;
  167. count += ret;
  168. }
  169. close(fd);
  170. return count;
  171. }
  172. /* Extract the content of a the first occurence of a given field in
  173. * the content of /proc/cpuinfo and return it as a heap-allocated
  174. * string that must be freed by the caller.
  175. *
  176. * Return NULL if not found
  177. */
  178. static char*
  179. extract_cpuinfo_field(const char* buffer, int buflen, const char* field)
  180. {
  181. int fieldlen = strlen(field);
  182. const char* bufend = buffer + buflen;
  183. char* result = NULL;
  184. int len, ignore;
  185. const char *p, *q;
  186. /* Look for first field occurence, and ensures it starts the line. */
  187. p = buffer;
  188. for (;;) {
  189. p = memmem(p, bufend-p, field, fieldlen);
  190. if (p == NULL)
  191. goto EXIT;
  192. if (p == buffer || p[-1] == '\n')
  193. break;
  194. p += fieldlen;
  195. }
  196. /* Skip to the first column followed by a space */
  197. p += fieldlen;
  198. p = memchr(p, ':', bufend-p);
  199. if (p == NULL || p[1] != ' ')
  200. goto EXIT;
  201. /* Find the end of the line */
  202. p += 2;
  203. q = memchr(p, '\n', bufend-p);
  204. if (q == NULL)
  205. q = bufend;
  206. /* Copy the line into a heap-allocated buffer */
  207. len = q-p;
  208. result = malloc(len+1);
  209. if (result == NULL)
  210. goto EXIT;
  211. memcpy(result, p, len);
  212. result[len] = '\0';
  213. EXIT:
  214. return result;
  215. }
  216. /* Checks that a space-separated list of items contains one given 'item'.
  217. * Returns 1 if found, 0 otherwise.
  218. */
  219. static int
  220. has_list_item(const char* list, const char* item)
  221. {
  222. const char* p = list;
  223. int itemlen = strlen(item);
  224. if (list == NULL)
  225. return 0;
  226. while (*p) {
  227. const char* q;
  228. /* skip spaces */
  229. while (*p == ' ' || *p == '\t')
  230. p++;
  231. /* find end of current list item */
  232. q = p;
  233. while (*q && *q != ' ' && *q != '\t')
  234. q++;
  235. if (itemlen == q-p && !memcmp(p, item, itemlen))
  236. return 1;
  237. /* skip to next item */
  238. p = q;
  239. }
  240. return 0;
  241. }
  242. /* Parse a number starting from 'input', but not going further
  243. * than 'limit'. Return the value into '*result'.
  244. *
  245. * NOTE: Does not skip over leading spaces, or deal with sign characters.
  246. * NOTE: Ignores overflows.
  247. *
  248. * The function returns NULL in case of error (bad format), or the new
  249. * position after the decimal number in case of success (which will always
  250. * be <= 'limit').
  251. */
  252. static const char*
  253. parse_number(const char* input, const char* limit, int base, int* result)
  254. {
  255. const char* p = input;
  256. int val = 0;
  257. while (p < limit) {
  258. int d = (*p - '0');
  259. if ((unsigned)d >= 10U) {
  260. d = (*p - 'a');
  261. if ((unsigned)d >= 6U)
  262. d = (*p - 'A');
  263. if ((unsigned)d >= 6U)
  264. break;
  265. d += 10;
  266. }
  267. if (d >= base)
  268. break;
  269. val = val*base + d;
  270. p++;
  271. }
  272. if (p == input)
  273. return NULL;
  274. *result = val;
  275. return p;
  276. }
  277. static const char*
  278. parse_decimal(const char* input, const char* limit, int* result)
  279. {
  280. return parse_number(input, limit, 10, result);
  281. }
  282. static const char*
  283. parse_hexadecimal(const char* input, const char* limit, int* result)
  284. {
  285. return parse_number(input, limit, 16, result);
  286. }
  287. /* This small data type is used to represent a CPU list / mask, as read
  288. * from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt
  289. *
  290. * For now, we don't expect more than 32 cores on mobile devices, so keep
  291. * everything simple.
  292. */
  293. typedef struct {
  294. uint32_t mask;
  295. } CpuList;
  296. static __inline__ void
  297. cpulist_init(CpuList* list) {
  298. list->mask = 0;
  299. }
  300. static __inline__ void
  301. cpulist_and(CpuList* list1, CpuList* list2) {
  302. list1->mask &= list2->mask;
  303. }
  304. static __inline__ void
  305. cpulist_set(CpuList* list, int index) {
  306. if ((unsigned)index < 32) {
  307. list->mask |= (uint32_t)(1U << index);
  308. }
  309. }
  310. static __inline__ int
  311. cpulist_count(CpuList* list) {
  312. return __builtin_popcount(list->mask);
  313. }
  314. /* Parse a textual list of cpus and store the result inside a CpuList object.
  315. * Input format is the following:
  316. * - comma-separated list of items (no spaces)
  317. * - each item is either a single decimal number (cpu index), or a range made
  318. * of two numbers separated by a single dash (-). Ranges are inclusive.
  319. *
  320. * Examples: 0
  321. * 2,4-127,128-143
  322. * 0-1
  323. */
  324. static void
  325. cpulist_parse(CpuList* list, const char* line, int line_len)
  326. {
  327. const char* p = line;
  328. const char* end = p + line_len;
  329. const char* q;
  330. /* NOTE: the input line coming from sysfs typically contains a
  331. * trailing newline, so take care of it in the code below
  332. */
  333. while (p < end && *p != '\n')
  334. {
  335. int val, start_value, end_value;
  336. /* Find the end of current item, and put it into 'q' */
  337. q = memchr(p, ',', end-p);
  338. if (q == NULL) {
  339. q = end;
  340. }
  341. /* Get first value */
  342. p = parse_decimal(p, q, &start_value);
  343. if (p == NULL)
  344. goto BAD_FORMAT;
  345. end_value = start_value;
  346. /* If we're not at the end of the item, expect a dash and
  347. * and integer; extract end value.
  348. */
  349. if (p < q && *p == '-') {
  350. p = parse_decimal(p+1, q, &end_value);
  351. if (p == NULL)
  352. goto BAD_FORMAT;
  353. }
  354. /* Set bits CPU list bits */
  355. for (val = start_value; val <= end_value; val++) {
  356. cpulist_set(list, val);
  357. }
  358. /* Jump to next item */
  359. p = q;
  360. if (p < end)
  361. p++;
  362. }
  363. BAD_FORMAT:
  364. ;
  365. }
  366. /* Read a CPU list from one sysfs file */
  367. static void
  368. cpulist_read_from(CpuList* list, const char* filename)
  369. {
  370. char file[64];
  371. int filelen;
  372. cpulist_init(list);
  373. filelen = read_file(filename, file, sizeof file);
  374. if (filelen < 0) {
  375. D("Could not read %s: %s\n", filename, strerror(errno));
  376. return;
  377. }
  378. cpulist_parse(list, file, filelen);
  379. }
  380. // See <asm/hwcap.h> kernel header.
  381. #define HWCAP_VFP (1 << 6)
  382. #define HWCAP_IWMMXT (1 << 9)
  383. #define HWCAP_NEON (1 << 12)
  384. #define HWCAP_VFPv3 (1 << 13)
  385. #define HWCAP_VFPv3D16 (1 << 14)
  386. #define HWCAP_VFPv4 (1 << 16)
  387. #define HWCAP_IDIVA (1 << 17)
  388. #define HWCAP_IDIVT (1 << 18)
  389. #define AT_HWCAP 16
  390. #if defined(__arm__)
  391. /* Compute the ELF HWCAP flags.
  392. */
  393. static uint32_t
  394. get_elf_hwcap(const char* cpuinfo, int cpuinfo_len)
  395. {
  396. /* IMPORTANT:
  397. * Accessing /proc/self/auxv doesn't work anymore on all
  398. * platform versions. More specifically, when running inside
  399. * a regular application process, most of /proc/self/ will be
  400. * non-readable, including /proc/self/auxv. This doesn't
  401. * happen however if the application is debuggable, or when
  402. * running under the "shell" UID, which is why this was not
  403. * detected appropriately.
  404. */
  405. #if 0
  406. uint32_t result = 0;
  407. const char filepath[] = "/proc/self/auxv";
  408. int fd = open(filepath, O_RDONLY);
  409. if (fd < 0) {
  410. D("Could not open %s: %s\n", filepath, strerror(errno));
  411. return 0;
  412. }
  413. struct { uint32_t tag; uint32_t value; } entry;
  414. for (;;) {
  415. int ret = read(fd, (char*)&entry, sizeof entry);
  416. if (ret < 0) {
  417. if (errno == EINTR)
  418. continue;
  419. D("Error while reading %s: %s\n", filepath, strerror(errno));
  420. break;
  421. }
  422. // Detect end of list.
  423. if (ret == 0 || (entry.tag == 0 && entry.value == 0))
  424. break;
  425. if (entry.tag == AT_HWCAP) {
  426. result = entry.value;
  427. break;
  428. }
  429. }
  430. close(fd);
  431. return result;
  432. #else
  433. // Recreate ELF hwcaps by parsing /proc/cpuinfo Features tag.
  434. uint32_t hwcaps = 0;
  435. char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
  436. if (cpuFeatures != NULL) {
  437. D("Found cpuFeatures = '%s'\n", cpuFeatures);
  438. if (has_list_item(cpuFeatures, "vfp"))
  439. hwcaps |= HWCAP_VFP;
  440. if (has_list_item(cpuFeatures, "vfpv3"))
  441. hwcaps |= HWCAP_VFPv3;
  442. if (has_list_item(cpuFeatures, "vfpv3d16"))
  443. hwcaps |= HWCAP_VFPv3D16;
  444. if (has_list_item(cpuFeatures, "vfpv4"))
  445. hwcaps |= HWCAP_VFPv4;
  446. if (has_list_item(cpuFeatures, "neon"))
  447. hwcaps |= HWCAP_NEON;
  448. if (has_list_item(cpuFeatures, "idiva"))
  449. hwcaps |= HWCAP_IDIVA;
  450. if (has_list_item(cpuFeatures, "idivt"))
  451. hwcaps |= HWCAP_IDIVT;
  452. if (has_list_item(cpuFeatures, "idiv"))
  453. hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT;
  454. if (has_list_item(cpuFeatures, "iwmmxt"))
  455. hwcaps |= HWCAP_IWMMXT;
  456. free(cpuFeatures);
  457. }
  458. return hwcaps;
  459. #endif
  460. }
  461. #endif /* __arm__ */
  462. /* Return the number of cpus present on a given device.
  463. *
  464. * To handle all weird kernel configurations, we need to compute the
  465. * intersection of the 'present' and 'possible' CPU lists and count
  466. * the result.
  467. */
  468. static int
  469. get_cpu_count(void)
  470. {
  471. CpuList cpus_present[1];
  472. CpuList cpus_possible[1];
  473. cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present");
  474. cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible");
  475. /* Compute the intersection of both sets to get the actual number of
  476. * CPU cores that can be used on this device by the kernel.
  477. */
  478. cpulist_and(cpus_present, cpus_possible);
  479. return cpulist_count(cpus_present);
  480. }
  481. static void
  482. android_cpuInitFamily(void)
  483. {
  484. #if defined(__arm__)
  485. g_cpuFamily = ANDROID_CPU_FAMILY_ARM;
  486. #elif defined(__i386__)
  487. g_cpuFamily = ANDROID_CPU_FAMILY_X86;
  488. #elif defined(__mips64)
  489. /* Needs to be before __mips__ since the compiler defines both */
  490. g_cpuFamily = ANDROID_CPU_FAMILY_MIPS64;
  491. #elif defined(__mips__)
  492. g_cpuFamily = ANDROID_CPU_FAMILY_MIPS;
  493. #elif defined(__aarch64__)
  494. g_cpuFamily = ANDROID_CPU_FAMILY_ARM64;
  495. #elif defined(__x86_64__)
  496. g_cpuFamily = ANDROID_CPU_FAMILY_X86_64;
  497. #else
  498. g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN;
  499. #endif
  500. }
  501. static void
  502. android_cpuInit(void)
  503. {
  504. char* cpuinfo = NULL;
  505. int cpuinfo_len;
  506. android_cpuInitFamily();
  507. g_cpuFeatures = 0;
  508. g_cpuCount = 1;
  509. g_inited = 1;
  510. cpuinfo_len = get_file_size("/proc/cpuinfo");
  511. if (cpuinfo_len < 0) {
  512. D("cpuinfo_len cannot be computed!");
  513. return;
  514. }
  515. cpuinfo = malloc(cpuinfo_len);
  516. if (cpuinfo == NULL) {
  517. D("cpuinfo buffer could not be allocated");
  518. return;
  519. }
  520. cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len);
  521. D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len,
  522. cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo);
  523. if (cpuinfo_len < 0) /* should not happen */ {
  524. free(cpuinfo);
  525. return;
  526. }
  527. /* Count the CPU cores, the value may be 0 for single-core CPUs */
  528. g_cpuCount = get_cpu_count();
  529. if (g_cpuCount == 0) {
  530. g_cpuCount = 1;
  531. }
  532. D("found cpuCount = %d\n", g_cpuCount);
  533. #ifdef __arm__
  534. {
  535. char* features = NULL;
  536. char* architecture = NULL;
  537. /* Extract architecture from the "CPU Architecture" field.
  538. * The list is well-known, unlike the the output of
  539. * the 'Processor' field which can vary greatly.
  540. *
  541. * See the definition of the 'proc_arch' array in
  542. * $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in
  543. * same file.
  544. */
  545. char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture");
  546. if (cpuArch != NULL) {
  547. char* end;
  548. long archNumber;
  549. int hasARMv7 = 0;
  550. D("found cpuArch = '%s'\n", cpuArch);
  551. /* read the initial decimal number, ignore the rest */
  552. archNumber = strtol(cpuArch, &end, 10);
  553. /* Here we assume that ARMv8 will be upwards compatible with v7
  554. * in the future. Unfortunately, there is no 'Features' field to
  555. * indicate that Thumb-2 is supported.
  556. */
  557. if (end > cpuArch && archNumber >= 7) {
  558. hasARMv7 = 1;
  559. }
  560. /* Unfortunately, it seems that certain ARMv6-based CPUs
  561. * report an incorrect architecture number of 7!
  562. *
  563. * See http://code.google.com/p/android/issues/detail?id=10812
  564. *
  565. * We try to correct this by looking at the 'elf_format'
  566. * field reported by the 'Processor' field, which is of the
  567. * form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for
  568. * an ARMv6-one.
  569. */
  570. if (hasARMv7) {
  571. char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len,
  572. "Processor");
  573. if (cpuProc != NULL) {
  574. D("found cpuProc = '%s'\n", cpuProc);
  575. if (has_list_item(cpuProc, "(v6l)")) {
  576. D("CPU processor and architecture mismatch!!\n");
  577. hasARMv7 = 0;
  578. }
  579. free(cpuProc);
  580. }
  581. }
  582. if (hasARMv7) {
  583. g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7;
  584. }
  585. /* The LDREX / STREX instructions are available from ARMv6 */
  586. if (archNumber >= 6) {
  587. g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX;
  588. }
  589. free(cpuArch);
  590. }
  591. /* Extract the list of CPU features from ELF hwcaps */
  592. uint32_t hwcaps = get_elf_hwcap(cpuinfo, cpuinfo_len);
  593. if (hwcaps != 0) {
  594. int has_vfp = (hwcaps & HWCAP_VFP);
  595. int has_vfpv3 = (hwcaps & HWCAP_VFPv3);
  596. int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16);
  597. int has_vfpv4 = (hwcaps & HWCAP_VFPv4);
  598. int has_neon = (hwcaps & HWCAP_NEON);
  599. int has_idiva = (hwcaps & HWCAP_IDIVA);
  600. int has_idivt = (hwcaps & HWCAP_IDIVT);
  601. int has_iwmmxt = (hwcaps & HWCAP_IWMMXT);
  602. // The kernel does a poor job at ensuring consistency when
  603. // describing CPU features. So lots of guessing is needed.
  604. // 'vfpv4' implies VFPv3|VFP_FMA|FP16
  605. if (has_vfpv4)
  606. g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 |
  607. ANDROID_CPU_ARM_FEATURE_VFP_FP16 |
  608. ANDROID_CPU_ARM_FEATURE_VFP_FMA;
  609. // 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC,
  610. // a value of 'vfpv3' doesn't necessarily mean that the D32
  611. // feature is present, so be conservative. All CPUs in the
  612. // field that support D32 also support NEON, so this should
  613. // not be a problem in practice.
  614. if (has_vfpv3 || has_vfpv3d16)
  615. g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;
  616. // 'vfp' is super ambiguous. Depending on the kernel, it can
  617. // either mean VFPv2 or VFPv3. Make it depend on ARMv7.
  618. if (has_vfp) {
  619. if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7)
  620. g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;
  621. else
  622. g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2;
  623. }
  624. // Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA
  625. if (has_neon) {
  626. g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 |
  627. ANDROID_CPU_ARM_FEATURE_NEON |
  628. ANDROID_CPU_ARM_FEATURE_VFP_D32;
  629. if (has_vfpv4)
  630. g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA;
  631. }
  632. // VFPv3 implies VFPv2 and ARMv7
  633. if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3)
  634. g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 |
  635. ANDROID_CPU_ARM_FEATURE_ARMv7;
  636. if (has_idiva)
  637. g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM;
  638. if (has_idivt)
  639. g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2;
  640. if (has_iwmmxt)
  641. g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt;
  642. }
  643. /* Extract the cpuid value from various fields */
  644. // The CPUID value is broken up in several entries in /proc/cpuinfo.
  645. // This table is used to rebuild it from the entries.
  646. static const struct CpuIdEntry {
  647. const char* field;
  648. char format;
  649. char bit_lshift;
  650. char bit_length;
  651. } cpu_id_entries[] = {
  652. { "CPU implementer", 'x', 24, 8 },
  653. { "CPU variant", 'x', 20, 4 },
  654. { "CPU part", 'x', 4, 12 },
  655. { "CPU revision", 'd', 0, 4 },
  656. };
  657. size_t i;
  658. D("Parsing /proc/cpuinfo to recover CPUID\n");
  659. for (i = 0;
  660. i < sizeof(cpu_id_entries)/sizeof(cpu_id_entries[0]);
  661. ++i) {
  662. const struct CpuIdEntry* entry = &cpu_id_entries[i];
  663. char* value = extract_cpuinfo_field(cpuinfo,
  664. cpuinfo_len,
  665. entry->field);
  666. if (value == NULL)
  667. continue;
  668. D("field=%s value='%s'\n", entry->field, value);
  669. char* value_end = value + strlen(value);
  670. int val = 0;
  671. const char* start = value;
  672. const char* p;
  673. if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X')) {
  674. start += 2;
  675. p = parse_hexadecimal(start, value_end, &val);
  676. } else if (entry->format == 'x')
  677. p = parse_hexadecimal(value, value_end, &val);
  678. else
  679. p = parse_decimal(value, value_end, &val);
  680. if (p > (const char*)start) {
  681. val &= ((1 << entry->bit_length)-1);
  682. val <<= entry->bit_lshift;
  683. g_cpuIdArm |= (uint32_t) val;
  684. }
  685. free(value);
  686. }
  687. // Handle kernel configuration bugs that prevent the correct
  688. // reporting of CPU features.
  689. static const struct CpuFix {
  690. uint32_t cpuid;
  691. uint64_t or_flags;
  692. } cpu_fixes[] = {
  693. /* The Nexus 4 (Qualcomm Krait) kernel configuration
  694. * forgets to report IDIV support. */
  695. { 0x510006f2, ANDROID_CPU_ARM_FEATURE_IDIV_ARM |
  696. ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 },
  697. { 0x510006f3, ANDROID_CPU_ARM_FEATURE_IDIV_ARM |
  698. ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 },
  699. };
  700. size_t n;
  701. for (n = 0; n < sizeof(cpu_fixes)/sizeof(cpu_fixes[0]); ++n) {
  702. const struct CpuFix* entry = &cpu_fixes[n];
  703. if (g_cpuIdArm == entry->cpuid)
  704. g_cpuFeatures |= entry->or_flags;
  705. }
  706. }
  707. #endif /* __arm__ */
  708. #ifdef __i386__
  709. int regs[4];
  710. /* According to http://en.wikipedia.org/wiki/CPUID */
  711. #define VENDOR_INTEL_b 0x756e6547
  712. #define VENDOR_INTEL_c 0x6c65746e
  713. #define VENDOR_INTEL_d 0x49656e69
  714. x86_cpuid(0, regs);
  715. int vendorIsIntel = (regs[1] == VENDOR_INTEL_b &&
  716. regs[2] == VENDOR_INTEL_c &&
  717. regs[3] == VENDOR_INTEL_d);
  718. x86_cpuid(1, regs);
  719. if ((regs[2] & (1 << 9)) != 0) {
  720. g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3;
  721. }
  722. if ((regs[2] & (1 << 23)) != 0) {
  723. g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT;
  724. }
  725. if (vendorIsIntel && (regs[2] & (1 << 22)) != 0) {
  726. g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE;
  727. }
  728. #endif
  729. free(cpuinfo);
  730. }
  731. AndroidCpuFamily
  732. android_getCpuFamily(void)
  733. {
  734. pthread_once(&g_once, android_cpuInit);
  735. return g_cpuFamily;
  736. }
  737. uint64_t
  738. android_getCpuFeatures(void)
  739. {
  740. pthread_once(&g_once, android_cpuInit);
  741. return g_cpuFeatures;
  742. }
  743. int
  744. android_getCpuCount(void)
  745. {
  746. pthread_once(&g_once, android_cpuInit);
  747. return g_cpuCount;
  748. }
  749. static void
  750. android_cpuInitDummy(void)
  751. {
  752. g_inited = 1;
  753. }
  754. int
  755. android_setCpu(int cpu_count, uint64_t cpu_features)
  756. {
  757. /* Fail if the library was already initialized. */
  758. if (g_inited)
  759. return 0;
  760. android_cpuInitFamily();
  761. g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count);
  762. g_cpuFeatures = cpu_features;
  763. pthread_once(&g_once, android_cpuInitDummy);
  764. return 1;
  765. }
  766. #ifdef __arm__
  767. uint32_t
  768. android_getCpuIdArm(void)
  769. {
  770. pthread_once(&g_once, android_cpuInit);
  771. return g_cpuIdArm;
  772. }
  773. int
  774. android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id)
  775. {
  776. if (!android_setCpu(cpu_count, cpu_features))
  777. return 0;
  778. g_cpuIdArm = cpu_id;
  779. return 1;
  780. }
  781. #endif /* __arm__ */
  782. /*
  783. * Technical note: Making sense of ARM's FPU architecture versions.
  784. *
  785. * FPA was ARM's first attempt at an FPU architecture. There is no Android
  786. * device that actually uses it since this technology was already obsolete
  787. * when the project started. If you see references to FPA instructions
  788. * somewhere, you can be sure that this doesn't apply to Android at all.
  789. *
  790. * FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of
  791. * new versions / additions to it. ARM considers this obsolete right now,
  792. * and no known Android device implements it either.
  793. *
  794. * VFPv2 added a few instructions to VFPv1, and is an *optional* extension
  795. * supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device
  796. * supporting the 'armeabi' ABI doesn't necessarily support these.
  797. *
  798. * VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used
  799. * on ARMv7-A CPUs which implement a FPU. Note that it is also mandated
  800. * by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means
  801. * that it provides 16 double-precision FPU registers (d0-d15) and 32
  802. * single-precision ones (s0-s31) which happen to be mapped to the same
  803. * register banks.
  804. *
  805. * VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 16
  806. * additional double precision registers (d16-d31). Note that there are
  807. * still only 32 single precision registers.
  808. *
  809. * VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision
  810. * registers. It is only used on ARMv7-M (i.e. on micro-controllers) which
  811. * are not supported by Android. Note that it is not compatible with VFPv2.
  812. *
  813. * NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D32
  814. * depending on context. For example GCC uses it for VFPv3-D32, but
  815. * the Linux kernel code uses it for VFPv3-D16 (especially in
  816. * /proc/cpuinfo). Always try to use the full designation when
  817. * possible.
  818. *
  819. * NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides
  820. * instructions to perform parallel computations on vectors of 8, 16,
  821. * 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all
  822. * NEON registers are also mapped to the same register banks.
  823. *
  824. * VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to
  825. * perform fused multiply-accumulate on VFP registers, as well as
  826. * half-precision (16-bit) conversion operations.
  827. *
  828. * VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision
  829. * registers.
  830. *
  831. * VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused
  832. * multiply-accumulate instructions that work on the NEON registers.
  833. *
  834. * NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D32
  835. * depending on context.
  836. *
  837. * The following information was determined by scanning the binutils-2.22
  838. * sources:
  839. *
  840. * Basic VFP instruction subsets:
  841. *
  842. * #define FPU_VFP_EXT_V1xD 0x08000000 // Base VFP instruction set.
  843. * #define FPU_VFP_EXT_V1 0x04000000 // Double-precision insns.
  844. * #define FPU_VFP_EXT_V2 0x02000000 // ARM10E VFPr1.
  845. * #define FPU_VFP_EXT_V3xD 0x01000000 // VFPv3 single-precision.
  846. * #define FPU_VFP_EXT_V3 0x00800000 // VFPv3 double-precision.
  847. * #define FPU_NEON_EXT_V1 0x00400000 // Neon (SIMD) insns.
  848. * #define FPU_VFP_EXT_D32 0x00200000 // Registers D16-D31.
  849. * #define FPU_VFP_EXT_FP16 0x00100000 // Half-precision extensions.
  850. * #define FPU_NEON_EXT_FMA 0x00080000 // Neon fused multiply-add
  851. * #define FPU_VFP_EXT_FMA 0x00040000 // VFP fused multiply-add
  852. *
  853. * FPU types (excluding NEON)
  854. *
  855. * FPU_VFP_V1xD (EXT_V1xD)
  856. * |
  857. * +--------------------------+
  858. * | |
  859. * FPU_VFP_V1 (+EXT_V1) FPU_VFP_V3xD (+EXT_V2+EXT_V3xD)
  860. * | |
  861. * | |
  862. * FPU_VFP_V2 (+EXT_V2) FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA)
  863. * |
  864. * FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3)
  865. * |
  866. * +--------------------------+
  867. * | |
  868. * FPU_VFP_V3 (+EXT_D32) FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA)
  869. * | |
  870. * | FPU_VFP_V4 (+EXT_D32)
  871. * |
  872. * FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA)
  873. *
  874. * VFP architectures:
  875. *
  876. * ARCH_VFP_V1xD (EXT_V1xD)
  877. * |
  878. * +------------------+
  879. * | |
  880. * | ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD)
  881. * | |
  882. * | ARCH_VFP_V3xD_FP16 (+EXT_FP16)
  883. * | |
  884. * | ARCH_VFP_V4_SP_D16 (+EXT_FMA)
  885. * |
  886. * ARCH_VFP_V1 (+EXT_V1)
  887. * |
  888. * ARCH_VFP_V2 (+EXT_V2)
  889. * |
  890. * ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3)
  891. * |
  892. * +-------------------+
  893. * | |
  894. * | ARCH_VFP_V3D16_FP16 (+EXT_FP16)
  895. * |
  896. * +-------------------+
  897. * | |
  898. * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)
  899. * | |
  900. * | ARCH_VFP_V4 (+EXT_D32)
  901. * | |
  902. * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)
  903. * |
  904. * ARCH_VFP_V3 (+EXT_D32)
  905. * |
  906. * +-------------------+
  907. * | |
  908. * | ARCH_VFP_V3_FP16 (+EXT_FP16)
  909. * |
  910. * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)
  911. * |
  912. * ARCH_NEON_FP16 (+EXT_FP16)
  913. *
  914. * -fpu=<name> values and their correspondance with FPU architectures above:
  915. *
  916. * {"vfp", FPU_ARCH_VFP_V2},
  917. * {"vfp9", FPU_ARCH_VFP_V2},
  918. * {"vfp3", FPU_ARCH_VFP_V3}, // For backwards compatbility.
  919. * {"vfp10", FPU_ARCH_VFP_V2},
  920. * {"vfp10-r0", FPU_ARCH_VFP_V1},
  921. * {"vfpxd", FPU_ARCH_VFP_V1xD},
  922. * {"vfpv2", FPU_ARCH_VFP_V2},
  923. * {"vfpv3", FPU_ARCH_VFP_V3},
  924. * {"vfpv3-fp16", FPU_ARCH_VFP_V3_FP16},
  925. * {"vfpv3-d16", FPU_ARCH_VFP_V3D16},
  926. * {"vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16},
  927. * {"vfpv3xd", FPU_ARCH_VFP_V3xD},
  928. * {"vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16},
  929. * {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1},
  930. * {"neon-fp16", FPU_ARCH_NEON_FP16},
  931. * {"vfpv4", FPU_ARCH_VFP_V4},
  932. * {"vfpv4-d16", FPU_ARCH_VFP_V4D16},
  933. * {"fpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16},
  934. * {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4},
  935. *
  936. *
  937. * Simplified diagram that only includes FPUs supported by Android:
  938. * Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI,
  939. * all others are optional and must be probed at runtime.
  940. *
  941. * ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3)
  942. * |
  943. * +-------------------+
  944. * | |
  945. * | ARCH_VFP_V3D16_FP16 (+EXT_FP16)
  946. * |
  947. * +-------------------+
  948. * | |
  949. * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)
  950. * | |
  951. * | ARCH_VFP_V4 (+EXT_D32)
  952. * | |
  953. * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)
  954. * |
  955. * ARCH_VFP_V3 (+EXT_D32)
  956. * |
  957. * +-------------------+
  958. * | |
  959. * | ARCH_VFP_V3_FP16 (+EXT_FP16)
  960. * |
  961. * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)
  962. * |
  963. * ARCH_NEON_FP16 (+EXT_FP16)
  964. *
  965. */
  966. #endif // defined(__le32__)