Commit 7b17d708 authored by Rémi Bernon's avatar Rémi Bernon Committed by Alexandre Julliard

msvcrt: Improve memset performance using overlapping stores.

For n larger than 16 we store 16 bytes on each end of the buffer, eventually overlapping, and then 16 additional bytes for n > 32. Then we can find a 32-byte aligned range overlapping the remaining part of the destination buffer, which is filled 32 bytes at a time in a loop. Signed-off-by: 's avatarRémi Bernon <rbernon@codeweavers.com> Signed-off-by: 's avatarPiotr Caban <piotr@codeweavers.com> Signed-off-by: 's avatarAlexandre Julliard <julliard@winehq.org>
parent e87f3dd5
...@@ -2855,13 +2855,67 @@ void * __cdecl memcpy(void *dst, const void *src, size_t n) ...@@ -2855,13 +2855,67 @@ void * __cdecl memcpy(void *dst, const void *src, size_t n)
return memmove(dst, src, n); return memmove(dst, src, n);
} }
static inline void memset_aligned_32(unsigned char *d, uint64_t v, size_t n)
{
while (n >= 32)
{
*(uint64_t *)(d + n - 32) = v;
*(uint64_t *)(d + n - 24) = v;
*(uint64_t *)(d + n - 16) = v;
*(uint64_t *)(d + n - 8) = v;
n -= 32;
}
}
/********************************************************************* /*********************************************************************
* memset (MSVCRT.@) * memset (MSVCRT.@)
*/ */
void* __cdecl memset(void *dst, int c, size_t n) void *__cdecl memset(void *dst, int c, size_t n)
{ {
volatile unsigned char *d = dst; /* avoid gcc optimizations */ uint64_t v = 0x101010101010101ull * (unsigned char)c;
while (n--) *d++ = c; unsigned char *d = (unsigned char *)dst;
size_t a = 0x20 - ((uintptr_t)d & 0x1f);
if (n >= 16)
{
*(uint64_t *)(d + 0) = v;
*(uint64_t *)(d + 8) = v;
*(uint64_t *)(d + n - 16) = v;
*(uint64_t *)(d + n - 8) = v;
if (n <= 32) return dst;
*(uint64_t *)(d + 16) = v;
*(uint64_t *)(d + 24) = v;
*(uint64_t *)(d + n - 32) = v;
*(uint64_t *)(d + n - 24) = v;
if (n <= 64) return dst;
n = (n - a) & ~0x1f;
memset_aligned_32(d + a, v, n);
return dst;
}
if (n >= 8)
{
*(uint64_t *)d = v;
*(uint64_t *)(d + n - 8) = v;
return dst;
}
if (n >= 4)
{
*(uint32_t *)d = v;
*(uint32_t *)(d + n - 4) = v;
return dst;
}
if (n >= 2)
{
*(uint16_t *)d = v;
*(uint16_t *)(d + n - 2) = v;
return dst;
}
if (n >= 1)
{
*(uint8_t *)d = v;
return dst;
}
return dst; return dst;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment