Files
UnrealEngine/Engine/Source/Programs/Shared/EpicGames.Core/CityHash.cs
2025-05-18 13:04:45 +08:00

473 lines
13 KiB
C#

#pragma warning disable IDE0073
// Copyright (c) 2011 Google, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// CityHash, by Geoff Pike and Jyrki Alakuijala
//
// This file provides CityHash64() and related functions.
//
// It's probably possible to create even faster hash functions by
// writing a program that systematically explores some of the space of
// possible hash functions, by using SIMD instructions, or by
// compromising on hash quality.
using System;
namespace EpicGames.Core
{
#pragma warning disable IDE0054
#pragma warning disable IDE0180
#pragma warning disable IDE1006
#pragma warning disable CS1591 // Missing documentation
#pragma warning disable CA1034 // Nested types should not be visible
#pragma warning disable CA1707 // Identifiers should not contain underscores
#pragma warning disable CA1051 // Do not declare visible instance fields
#pragma warning disable CA1822 // Mark members as static
public class CityHash
{
// Some primes between 2^63 and 2^64 for various uses.
const ulong k0 = 0xc3a5c85c97cb3127;
const ulong k1 = 0xb492b66fbe98f273;
const ulong k2 = 0x9ae16a3b2f90404f;
// Magic numbers for 32-bit hashing. Copied from Murmur3.
const uint c1 = 0xcc9e2d51;
const uint c2 = 0x1b873593;
public struct Uint128_64
{
public ulong lo;
public ulong hi;
}
private static uint bswap_32(uint x)
{
return (x >> 24) + ((x >> 8) & 0xff00) + ((x << 8) & 0xff0000) + (x << 24);
}
private static ulong bswap_64(ulong x)
{
return (x >> 56) + ((x >> 40) & 0xff00) + ((x >> 24) & 0xff0000) + ((x >> 8) & 0xff000000) + ((x << 8) & 0xff00000000L) + ((x << 24) & 0xff0000000000L) + ((x << 40) & 0xff000000000000L) + (x << 56);
}
private static unsafe uint Fetch32(byte* data)
{
#if BIGENDIAN
return (uint)data[3] + ((uint)data[2] << 8) + ((uint)data[1] << 16) + ((uint)data[0] << 24);
#else
return (uint)data[0] + ((uint)data[1] << 8) + ((uint)data[2] << 16) + ((uint)data[3] << 24);
#endif
}
private static unsafe ulong Fetch64(byte* data)
{
#if BIGENDIAN
return ((ulong)Fetch32(data) << 32) + (ulong)Fetch32(data + 4);
#else
return (ulong)Fetch32(data) + ((ulong)Fetch32(data + 4) << 32);
#endif
}
private static uint fmix(uint h)
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
private static uint Rotate32(uint val, int shift)
{
// Avoid shifting by 32: doing so yields an undefined result.
return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
}
private static void SwapValues<T>(ref T a, ref T b)
{
T c = a;
a = b;
b = c;
}
private static void PERMUTE3<T>(ref T a, ref T b, ref T c)
{
//SwapValues(ref a, ref b);
//SwapValues(ref a, ref c);
T d = c;
c = b;
b = a;
a = d;
}
private static uint Mur(uint a, uint h)
{
// Helper from Murmur3 for combining two 32-bit values.
a *= c1;
a = Rotate32(a, 17);
a *= c2;
h ^= a;
h = Rotate32(h, 19);
return h * 5 + 0xe6546b64;
}
private static unsafe uint Hash32Len13to24(byte* s, uint len)
{
uint a = Fetch32(s - 4 + (len >> 1));
uint b = Fetch32(s + 4);
uint c = Fetch32(s + len - 8);
uint d = Fetch32(s + (len >> 1));
uint e = Fetch32(s);
uint f = Fetch32(s + len - 4);
uint h = len;
return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
}
private static unsafe uint Hash32Len0to4(byte* s, uint len)
{
uint b = 0;
uint c = 9;
for (uint i = 0; i < len; i++)
{
char v = (char)s[i];
b = b * c1 + v;
c ^= b;
}
return fmix(Mur(b, Mur(len, c)));
}
private static unsafe uint Hash32Len5to12(byte* s, uint len)
{
uint a = len, b = len * 5, c = 9, d = b;
a += Fetch32(s);
b += Fetch32(s + len - 4);
c += Fetch32(s + ((len >> 1) & 4));
return fmix(Mur(c, Mur(b, Mur(a, d))));
}
public static unsafe uint CityHash32(byte* s, uint len)
{
if (len <= 24)
{
return len <= 12 ?
(len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) :
Hash32Len13to24(s, len);
}
// len > 24
uint h = len, g = c1 * len, f = g;
uint a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
uint a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
uint a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
uint a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
uint a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
h ^= a0;
h = Rotate32(h, 19);
h = h * 5 + 0xe6546b64;
h ^= a2;
h = Rotate32(h, 19);
h = h * 5 + 0xe6546b64;
g ^= a1;
g = Rotate32(g, 19);
g = g * 5 + 0xe6546b64;
g ^= a3;
g = Rotate32(g, 19);
g = g * 5 + 0xe6546b64;
f += a4;
f = Rotate32(f, 19);
f = f * 5 + 0xe6546b64;
uint iters = (len - 1) / 20;
do
{
uint _a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
uint _a1 = Fetch32(s + 4);
uint _a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
uint _a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
uint _a4 = Fetch32(s + 16);
h ^= _a0;
h = Rotate32(h, 18);
h = h * 5 + 0xe6546b64;
f += _a1;
f = Rotate32(f, 19);
f = f * c1;
g += _a2;
g = Rotate32(g, 18);
g = g * 5 + 0xe6546b64;
h ^= _a3 + _a1;
h = Rotate32(h, 19);
h = h * 5 + 0xe6546b64;
g ^= _a4;
g = bswap_32(g) * 5;
h += _a4 * 5;
h = bswap_32(h);
f += _a0;
PERMUTE3(ref f, ref h, ref g);
s += 20;
} while (--iters != 0);
g = Rotate32(g, 11) * c1;
g = Rotate32(g, 17) * c1;
f = Rotate32(f, 11) * c1;
f = Rotate32(f, 17) * c1;
h = Rotate32(h + g, 19);
h = h * 5 + 0xe6546b64;
h = Rotate32(h, 17) * c1;
h = Rotate32(h + f, 19);
h = h * 5 + 0xe6546b64;
h = Rotate32(h, 17) * c1;
return h;
}
public static uint CityHash32(ReadOnlySpan<byte> Span)
{
unsafe
{
fixed (byte* s = Span)
{
return CityHash32(s, (uint)Span.Length);
}
}
}
// Bitwise right rotate. Normally this will compile to a single
// instruction, especially if the shift is a manifest constant.
private static ulong Rotate(ulong val, int shift)
{
// Avoid shifting by 64: doing so yields an undefined result.
return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
}
private static ulong ShiftMix(ulong val)
{
return val ^ (val >> 47);
}
private static ulong HashLen16(ulong u, ulong v)
{
return CityHash128to64(new Uint128_64 { lo = u, hi = v });
}
private static ulong HashLen16(ulong u, ulong v, ulong mul)
{
// Murmur-inspired hashing.
ulong a = (u ^ v) * mul;
a ^= (a >> 47);
ulong b = (v ^ a) * mul;
b ^= (b >> 47);
b *= mul;
return b;
}
private static unsafe ulong HashLen0to16(byte* s, uint len)
{
if (len >= 8)
{
ulong mul = k2 + len * 2;
ulong a = Fetch64(s) + k2;
ulong b = Fetch64(s + len - 8);
ulong c = Rotate(b, 37) * mul + a;
ulong d = (Rotate(a, 25) + b) * mul;
return HashLen16(c, d, mul);
}
if (len >= 4)
{
ulong mul = k2 + len * 2;
ulong a = Fetch32(s);
return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
}
if (len > 0)
{
byte a = s[0];
byte b = s[len >> 1];
byte c = s[len - 1];
uint y = (uint)a + ((uint)b << 8);
uint z = len + ((uint)c << 2);
return ShiftMix(y * k2 ^ z * k0) * k2;
}
return k2;
}
// This probably works well for 16-byte strings as well, but it may be overkill
// in that case.
private static unsafe ulong HashLen17to32(byte* s, uint len)
{
ulong mul = k2 + len * 2;
ulong a = Fetch64(s) * k1;
ulong b = Fetch64(s + 8);
ulong c = Fetch64(s + len - 8) * mul;
ulong d = Fetch64(s + len - 16) * k2;
return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d,
a + Rotate(b + k2, 18) + c, mul);
}
// Return a 16-byte hash for 48 bytes. Quick and dirty.
// Callers do best to use "random-looking" values for a and b.
private static unsafe Uint128_64 WeakHashLen32WithSeeds(
ulong w, ulong x, ulong y, ulong z, ulong a, ulong b)
{
a += w;
b = Rotate(b + a + z, 21);
ulong c = a;
a += x;
a += y;
b += Rotate(a, 44);
return new Uint128_64 { lo = (a + z), hi = (b + c) };
}
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
private static unsafe Uint128_64 WeakHashLen32WithSeeds(byte* s, ulong a, ulong b)
{
return WeakHashLen32WithSeeds(Fetch64(s),
Fetch64(s + 8),
Fetch64(s + 16),
Fetch64(s + 24),
a,
b);
}
// Return an 8-byte hash for 33 to 64 bytes.
private static unsafe ulong HashLen33to64(byte* s, uint len)
{
ulong mul = k2 + len * 2;
ulong a = Fetch64(s) * k2;
ulong b = Fetch64(s + 8);
ulong c = Fetch64(s + len - 24);
ulong d = Fetch64(s + len - 32);
ulong e = Fetch64(s + 16) * k2;
ulong f = Fetch64(s + 24) * 9;
ulong g = Fetch64(s + len - 8);
ulong h = Fetch64(s + len - 16) * mul;
ulong u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
ulong v = ((a + g) ^ d) + f + 1;
ulong w = bswap_64((u + v) * mul) + h;
ulong x = Rotate(e + f, 42) + c;
ulong y = (bswap_64((v + w) * mul) + g) * mul;
ulong z = e + f + c;
a = bswap_64((x + z) * mul + y) + b;
b = ShiftMix((z + a) * mul + d + h) * mul;
return b + x;
}
public static unsafe ulong CityHash64(byte* s, uint len)
{
if (len <= 32)
{
if (len <= 16)
{
return HashLen0to16(s, len);
}
else
{
return HashLen17to32(s, len);
}
}
else if (len <= 64)
{
return HashLen33to64(s, len);
}
// For strings over 64 bytes we hash the end first, and then as we
// loop we keep 56 bytes of state: v, w, x, y, and z.
ulong x = Fetch64(s + len - 40);
ulong y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
ulong z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
Uint128_64 v = WeakHashLen32WithSeeds(s + len - 64, len, z);
Uint128_64 w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
x = x * k1 + Fetch64(s);
// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
len = (len - 1) & ~(uint)63;
do
{
x = Rotate(x + y + v.lo + Fetch64(s + 8), 37) * k1;
y = Rotate(y + v.hi + Fetch64(s + 48), 42) * k1;
x ^= w.hi;
y += v.lo + Fetch64(s + 40);
z = Rotate(z + w.lo, 33) * k1;
v = WeakHashLen32WithSeeds(s, v.hi * k1, x + w.lo);
w = WeakHashLen32WithSeeds(s + 32, z + w.hi, y + Fetch64(s + 16));
SwapValues(ref z, ref x);
s += 64;
len -= 64;
} while (len != 0);
return HashLen16(HashLen16(v.lo, w.lo) + ShiftMix(y) * k1 + z,
HashLen16(v.hi, w.hi) + x);
}
public static ulong CityHash64(ReadOnlySpan<byte> Span)
{
unsafe
{
fixed (byte* s = Span)
{
return CityHash64(s, (uint)Span.Length);
}
}
}
public static unsafe ulong CityHash64WithSeed(byte* s, uint len, ulong seed)
{
return CityHash64WithSeeds(s, len, k2, seed);
}
public ulong CityHash64WithSeed(ReadOnlySpan<byte> span, ulong seed)
{
return CityHash64WithSeeds(span, k2, seed);
}
public static unsafe ulong CityHash64WithSeeds(byte* s, uint len, ulong seed0, ulong seed1)
{
return HashLen16(CityHash64(s, len) - seed0, seed1);
}
public static ulong CityHash64WithSeeds(ReadOnlySpan<byte> Span, ulong seed0, ulong seed1)
{
unsafe
{
fixed (byte* s = Span)
{
return CityHash64WithSeeds(s, (uint)Span.Length, seed0, seed1);
}
}
}
// Hash 128 input bits down to 64 bits of output.
// This is intended to be a reasonably good hash function.
public static ulong CityHash128to64(Uint128_64 x)
{
// Murmur-inspired hashing.
const ulong kMul = 0x9ddfea08eb382d69L;
ulong a = (x.lo ^ x.hi) * kMul;
a ^= (a >> 47);
ulong b = (x.hi ^ a) * kMul;
b ^= (b >> 47);
b *= kMul;
return b;
}
}
}
#pragma warning restore IDE1006
#pragma warning restore IDE0180
#pragma warning restore IDE0054