7fc03e0084
64-by-32-bit divisions are prominent in the NuttX, even on 32-bit machines. Luckily, many of them use a constant divisor that allows for a much faster multiplication by the divisor's reciprocal. The compiler already performs this optimization when compiling a 32-by-32 division with a constant divisor. Unfortunately, on 32-bit machines, gcc does not optimize 64-by-32 divisions in that case, except for constant divisors that happen to be a power of 2. Let's avoid the slow path whenever the divisor is constant by manually computing the reciprocal ourselves and performing the multiplication inline. In most cases, this improves performance of 64-by-32 divisions by about two orders of magnitude compared to the __div64_32() fallback, especially on architectures lacking a native div instruction. Signed-off-by: ligd <liguiding1@xiaomi.com> |
||
---|---|---|
.. | ||
android | ||
arpa | ||
crypto | ||
cxx | ||
net | ||
netinet | ||
netpacket | ||
nuttx | ||
ssp | ||
sys | ||
.gitignore | ||
aio.h | ||
alloca.h | ||
assert.h | ||
byteswap.h | ||
ctype.h | ||
debug.h | ||
dirent.h | ||
dlfcn.h | ||
dsp.h | ||
dspb16.h | ||
elf32.h | ||
elf64.h | ||
elf.h | ||
endian.h | ||
err.h | ||
errno.h | ||
execinfo.h | ||
fcntl.h | ||
fixedmath.h | ||
fnmatch.h | ||
ftw.h | ||
getopt.h | ||
glob.h | ||
grp.h | ||
hex2bin.h | ||
iconv.h | ||
ifaddrs.h | ||
inttypes.h | ||
iso646.h | ||
langinfo.h | ||
libgen.h | ||
libintl.h | ||
limits.h | ||
locale.h | ||
lzf.h | ||
malloc.h | ||
mqueue.h | ||
netdb.h | ||
nl_types.h | ||
nxflat.h | ||
obstack.h | ||
poll.h | ||
pthread.h | ||
pty.h | ||
pwd.h | ||
regex.h | ||
resolv.h | ||
sched.h | ||
search.h | ||
semaphore.h | ||
signal.h | ||
spawn.h | ||
stdbool.h | ||
stddef.h | ||
stdint.h | ||
stdio.h | ||
stdlib.h | ||
stdnoreturn.h | ||
string.h | ||
strings.h | ||
syscall.h | ||
syslog.h | ||
termios.h | ||
threads.h | ||
time.h | ||
unistd.h | ||
utime.h | ||
uuid.h | ||
wchar.h | ||
wctype.h |