123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223 |
- // Copyright 2011 Google Inc. All Rights Reserved.
- //
- // Use of this source code is governed by a BSD-style license
- // that can be found in the COPYING file in the root of the source
- // tree. An additional intellectual property rights grant can be found
- // in the file PATENTS. All contributing project authors may
- // be found in the AUTHORS file in the root of the source tree.
- // -----------------------------------------------------------------------------
- //
- // CPU detection
- //
- // Author: Christian Duvivier (cduvivier@google.com)
- #include "./dsp.h"
- #if defined(WEBP_HAVE_NEON_RTCD)
- #include <stdio.h>
- #include <string.h>
- #endif
- #if defined(WEBP_ANDROID_NEON)
- #include <cpu-features.h>
- #endif
- //------------------------------------------------------------------------------
- // SSE2 detection.
- //
- // apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
- #if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
- static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
- __asm__ volatile (
- "mov %%ebx, %%edi\n"
- "cpuid\n"
- "xchg %%edi, %%ebx\n"
- : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
- : "a"(info_type), "c"(0));
- }
- #elif defined(__x86_64__) && \
- (defined(__code_model_medium__) || defined(__code_model_large__)) && \
- defined(__PIC__)
- static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
- __asm__ volatile (
- "xchg{q}\t{%%rbx}, %q1\n"
- "cpuid\n"
- "xchg{q}\t{%%rbx}, %q1\n"
- : "=a"(cpu_info[0]), "=&r"(cpu_info[1]), "=c"(cpu_info[2]),
- "=d"(cpu_info[3])
- : "a"(info_type), "c"(0));
- }
- #elif defined(__i386__) || defined(__x86_64__)
- static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
- __asm__ volatile (
- "cpuid\n"
- : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
- : "a"(info_type), "c"(0));
- }
- #elif (defined(_M_X64) || defined(_M_IX86)) && \
- defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1
- #include <intrin.h>
- #define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0
- #elif defined(WEBP_MSC_SSE2)
- #define GetCPUInfo __cpuid
- #endif
- // NaCl has no support for xgetbv or the raw opcode.
- #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
- static WEBP_INLINE uint64_t xgetbv(void) {
- const uint32_t ecx = 0;
- uint32_t eax, edx;
- // Use the raw opcode for xgetbv for compatibility with older toolchains.
- __asm__ volatile (
- ".byte 0x0f, 0x01, 0xd0\n"
- : "=a"(eax), "=d"(edx) : "c" (ecx));
- return ((uint64_t)edx << 32) | eax;
- }
- #elif (defined(_M_X64) || defined(_M_IX86)) && \
- defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
- #include <immintrin.h>
- #define xgetbv() _xgetbv(0)
- #elif defined(_MSC_VER) && defined(_M_IX86)
- static WEBP_INLINE uint64_t xgetbv(void) {
- uint32_t eax_, edx_;
- __asm {
- xor ecx, ecx // ecx = 0
- // Use the raw opcode for xgetbv for compatibility with older toolchains.
- __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
- mov eax_, eax
- mov edx_, edx
- }
- return ((uint64_t)edx_ << 32) | eax_;
- }
- #else
- #define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
- #endif
- #if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
- // helper function for run-time detection of slow SSSE3 platforms
- static int CheckSlowModel(int info) {
- // Table listing display models with longer latencies for the bsr instruction
- // (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb.
- // Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual.
- static const uint8_t kSlowModels[] = {
- 0x37, 0x4a, 0x4d, // Silvermont Microarchitecture
- 0x1c, 0x26, 0x27 // Atom Microarchitecture
- };
- const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf);
- const uint32_t family = (info >> 8) & 0xf;
- if (family == 0x06) {
- size_t i;
- for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) {
- if (model == kSlowModels[i]) return 1;
- }
- }
- return 0;
- }
- static int x86CPUInfo(CPUFeature feature) {
- int max_cpuid_value;
- int cpu_info[4];
- int is_intel = 0;
- // get the highest feature value cpuid supports
- GetCPUInfo(cpu_info, 0);
- max_cpuid_value = cpu_info[0];
- if (max_cpuid_value < 1) {
- return 0;
- } else {
- const int VENDOR_ID_INTEL_EBX = 0x756e6547; // uneG
- const int VENDOR_ID_INTEL_EDX = 0x49656e69; // Ieni
- const int VENDOR_ID_INTEL_ECX = 0x6c65746e; // letn
- is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX &&
- cpu_info[2] == VENDOR_ID_INTEL_ECX &&
- cpu_info[3] == VENDOR_ID_INTEL_EDX); // genuine Intel?
- }
- GetCPUInfo(cpu_info, 1);
- if (feature == kSSE2) {
- return !!(cpu_info[3] & (1 << 26));
- }
- if (feature == kSSE3) {
- return !!(cpu_info[2] & (1 << 0));
- }
- if (feature == kSlowSSSE3) {
- if (is_intel && (cpu_info[2] & (1 << 0))) { // SSSE3?
- return CheckSlowModel(cpu_info[0]);
- }
- return 0;
- }
- if (feature == kSSE4_1) {
- return !!(cpu_info[2] & (1 << 19));
- }
- if (feature == kAVX) {
- // bits 27 (OSXSAVE) & 28 (256-bit AVX)
- if ((cpu_info[2] & 0x18000000) == 0x18000000) {
- // XMM state and YMM state enabled by the OS.
- return (xgetbv() & 0x6) == 0x6;
- }
- }
- if (feature == kAVX2) {
- if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) {
- GetCPUInfo(cpu_info, 7);
- return !!(cpu_info[1] & (1 << 5));
- }
- }
- return 0;
- }
- VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
- #elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test.
- static int AndroidCPUInfo(CPUFeature feature) {
- const AndroidCpuFamily cpu_family = android_getCpuFamily();
- const uint64_t cpu_features = android_getCpuFeatures();
- if (feature == kNEON) {
- return (cpu_family == ANDROID_CPU_FAMILY_ARM &&
- 0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON));
- }
- return 0;
- }
- VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
- #elif defined(WEBP_USE_NEON)
- // define a dummy function to enable turning off NEON at runtime by setting
- // VP8DecGetCPUInfo = NULL
- static int armCPUInfo(CPUFeature feature) {
- if (feature != kNEON) return 0;
- #if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD)
- {
- int has_neon = 0;
- char line[200];
- FILE* const cpuinfo = fopen("/proc/cpuinfo", "r");
- if (cpuinfo == NULL) return 0;
- while (fgets(line, sizeof(line), cpuinfo)) {
- if (!strncmp(line, "Features", 8)) {
- if (strstr(line, " neon ") != NULL) {
- has_neon = 1;
- break;
- }
- }
- }
- fclose(cpuinfo);
- return has_neon;
- }
- #else
- return 1;
- #endif
- }
- VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
- #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \
- defined(WEBP_USE_MSA)
- static int mipsCPUInfo(CPUFeature feature) {
- if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) {
- return 1;
- } else {
- return 0;
- }
- }
- VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
- #else
- VP8CPUInfo VP8GetCPUInfo = NULL;
- #endif
|