Drop assembly for 24-bit fixed-point operations

Now i386 compiler is smart enough to generate a single imul
instruction per fixed-point multiplication. This change also allows
24-bit precision to be used on all archs (if -DADATA_BITS=24 is used).

Tested on arm64 by Doug Moss <dougmoss710 at yahoo.com>, thanks

Suggested and ok naddy@
This commit is contained in:
Alexandre Ratchov 2021-05-31 15:08:02 +02:00
parent fde5ad8a68
commit 1f42a21368
2 changed files with 0 additions and 80 deletions

View File

@ -40,51 +40,11 @@ typedef short adata_t;
#elif ADATA_BITS == 24
#if defined(__i386__) && defined(__GNUC__)
static inline int
fp24_mul(int x, int a)
{
int res;
asm volatile (
"imull %2\n\t"
"shrdl $23, %%edx, %%eax\n\t"
: "=a" (res)
: "a" (x), "r" (a)
: "%edx"
);
return res;
}
static inline int
fp24_muldiv(int x, int a, int b)
{
int res;
asm volatile (
"imull %2\n\t"
"idivl %3\n\t"
: "=a" (res)
: "a" (x), "d" (a), "r" (b)
);
return res;
}
#define ADATA_MUL(x,y) fp24_mul(x, y)
#define ADATA_MULDIV(x,y,z) fp24_muldiv(x, y, z);
#elif defined(__amd64__) || defined(__sparc64__)
#define ADATA_MUL(x,y) \
((int)(((long long)(x) * (long long)(y)) >> (ADATA_BITS - 1)))
#define ADATA_MULDIV(x,y,z) \
((int)((long long)(x) * (long long)(y) / (long long)(z)))
#else
#error "no 24-bit code for this architecture"
#endif
typedef int adata_t;
#else

View File

@ -40,51 +40,11 @@ typedef short adata_t;
#elif ADATA_BITS == 24
#if defined(__i386__) && defined(__GNUC__)
static inline int
fp24_mul(int x, int a)
{
int res;
asm volatile (
"imull %2\n\t"
"shrdl $23, %%edx, %%eax\n\t"
: "=a" (res)
: "a" (x), "r" (a)
: "%edx"
);
return res;
}
static inline int
fp24_muldiv(int x, int a, int b)
{
int res;
asm volatile (
"imull %2\n\t"
"idivl %3\n\t"
: "=a" (res)
: "a" (x), "d" (a), "r" (b)
);
return res;
}
#define ADATA_MUL(x,y) fp24_mul(x, y)
#define ADATA_MULDIV(x,y,z) fp24_muldiv(x, y, z);
#elif defined(__amd64__) || defined(__sparc64__)
#define ADATA_MUL(x,y) \
((int)(((long long)(x) * (long long)(y)) >> (ADATA_BITS - 1)))
#define ADATA_MULDIV(x,y,z) \
((int)((long long)(x) * (long long)(y) / (long long)(z)))
#else
#error "no 24-bit code for this architecture"
#endif
typedef int adata_t;
#else