1 | // tHash.h  |
2 | //  |
3 | // Hash functions for various kinds of data. Using 64 or 256 bit versions if you want to avoid collisions. There are two  |
4 | // 32 bit hash functions. A fast version used for most string hashes, and a slower but better version. All functions  |
5 | // return the supplied initialization vector(iv) if there was no data to hash. To compute a single hash from multiple  |
6 | // data sources like strings, binary data, or files, you do NOT need to consolidate all the source data into one buffer  |
7 | // first. Just set the initialization vector to the hash computed from the previous step.  |
8 | //  |
9 | // Copyright (c) 2004-2006, 2015, 2017, 2019 Tristan Grimmer.  |
10 | // Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby  |
11 | // granted, provided that the above copyright notice and this permission notice appear in all copies.  |
12 | //  |
13 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL  |
14 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,  |
15 | // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  |
16 | // AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR  |
17 | // PERFORMANCE OF THIS SOFTWARE.  |
18 |   |
19 | #pragma once  |
20 | #include <Foundation/tStandard.h>  |
21 | #include <Foundation/tString.h>  |
22 | #include <Foundation/tFixInt.h>  |
23 | namespace tHash  |
24 | {  |
25 |   |
26 |   |
27 | enum class tHashAlgorithm  |
28 | {  |
29 | Fast32,  |
30 | Jenkins32,  |
31 | Jenkins64,  |
32 | MD5, // MD5 is 128 bit.  |
33 | Jenkins256  |
34 | };  |
35 |   |
36 |   |
37 | // These initialization vectors should not be modified unless you want to break a lot of code. The zero 32bit one is  |
38 | // responsible for things like stringhash returning zero on empty strings.  |
39 | const uint32 HashIV32 = 0;  |
40 | const uint64 HashIV64 = 0;  |
41 | const tuint128 HashIV128 = 0;  |
42 | const tuint256 HashIV256 = 0;  |
43 |   |
44 | // Normally the initial iv should not be modified for these fast hash functions. The fast hash functions are the only  |
45 | // ones that store the entire state in the hash. Allows you to concatenate hashes of separate strings/data-sequences  |
46 | // together by passing the hash of the previous call into the function again. This way you don't need to create a  |
47 | // concatenated string/data-set to get its hash, you simply chain multiple calls together. The fast hash functions  |
48 | // are the only ones that guarantee the same hash value whether computed in parts or as a single data-set.  |
49 | uint32 tHashDataFast32(const uint8* data, int length, uint32 iv = HashIV32);  |
50 | uint32 tHashStringFast32(const char*, uint32 iv = HashIV32);  |
51 | uint32 tHashStringFast32(const tString&, uint32 iv = HashIV32);  |
52 | uint32 tHashString(const char*);  |
53 |   |
54 | // The CT (Compile Time) variant uses the fast-hash algorithm. It is super handy for use in the 'case' part of switch  |
55 | // statements or any time you know the string literal explicitly. In these cases the compiler can do all the work.  |
56 | constexpr uint32 tHashCT(const char*, uint32 iv = HashIV32);  |
57 |   |
58 | // The HashData32/64/128/256 and variants do _not_ guarantee the same hash value if they are chained together compared  |
59 | // to the hash of the same data computed as a single block. This is because the entire state is not stored in the hash  |
60 | // itself since these are much better hash functions than the Fast32 versions. Chaining is still useful as uniqueness is  |
61 | // still guaranteed and if any data changes in any of the sources the end result will vary. Chaining is performed in the  |
62 | // same manner as HashDataFast32.  |
63 | uint32 tHashData32(const uint8* data, int length, uint32 iv = HashIV32);  |
64 | uint32 tHashString32(const char*, uint32 iv = HashIV32);  |
65 | uint32 tHashString32(const tString&, uint32 iv = HashIV32);  |
66 |   |
67 | uint64 tHashData64(const uint8* data, int length, uint64 iv = HashIV64);  |
68 | uint64 tHashString64(const char*, uint64 iv = HashIV64);  |
69 | uint64 tHashString64(const tString&, uint64 iv = HashIV64);  |
70 |   |
71 | // The MD5 functions are used by the HashData128 functions. For reference and testing:  |
72 | // MD5("The quick brown fox jumps over the lazy dog") = 9e107d9d372bb6826bd81d3542a419d6  |
73 | // MD5("The quick brown fox jumps over the lazy dog.") = e4d909c290d0fb1ca068ffaddf22cbd0  |
74 | tuint128 tHashDataMD5(const uint8* data, int length, tuint128 iv = HashIV128);  |
75 | tuint128 tHashStringMD5(const char*, tuint128 iv = HashIV128);  |
76 | tuint128 tHashStringMD5(const tString&, tuint128 iv = HashIV128);  |
77 |   |
78 | tuint128 tHashData128(const uint8* data, int length, tuint128 iv = HashIV128);  |
79 | tuint128 tHashString128(const char*, tuint128 iv = HashIV128);  |
80 | tuint128 tHashString128(const tString&, tuint128 iv = HashIV128);  |
81 |   |
82 | tuint256 tHashData256(const uint8* data, int length, tuint256 iv = HashIV256);  |
83 | tuint256 tHashString256(const char*, const tuint256& iv = HashIV256);  |
84 | tuint256 tHashString256(const tString&, const tuint256& iv = HashIV256);  |
85 |   |
86 |   |
87 | // Implementation below this line.  |
88 |   |
89 |   |
90 | inline uint32 tHashStringFast32(const char* string, uint32 iv)  |
91 | {  |
92 | if (!string)  |
93 | return 0;  |
94 | return tHashDataFast32((uint8*)string, tStd::tStrlen(string), iv);  |
95 | }  |
96 |   |
97 |   |
98 | // This (compile-time) constant expression relies on the odometer-style looping of unsigned ints to compute the hash.  |
99 | // Since it's inline, you may need to pragma warning(disable:4307), which warns of const integral overflow.  |
100 | inline constexpr uint32 tHashCT(const char* s, uint32 hash) { return *s ? tHashCT(s + 1, hash + (hash << 5) + uint8(*s)) : hash; }  |
101 | inline uint32 tHashStringFast32(const tString& s, uint32 iv) { return tHashStringFast32(s.ConstText(), iv); }  |
102 | inline uint32 tHashString(const char* s) { return tHashStringFast32(s); }  |
103 | inline uint32 tHashString32(const char* string, uint32 iv) { return tHashData32((uint8*)string, tStd::tStrlen(string), iv); }  |
104 | inline uint32 tHashString32(const tString& s, uint32 iv) { return tHashString32(s.ConstText(), iv); }  |
105 | inline uint64 tHashString64(const char* string, uint64 iv) { return tHashData64((uint8*)string, tStd::tStrlen(string), iv); }  |
106 | inline uint64 tHashString64(const tString& s, uint64 iv) { return tHashString64(s.ConstText(), iv); }  |
107 | inline tuint128 tHashStringMD5(const char* string, tuint128 iv) { return tHashDataMD5((uint8*)string, tStd::tStrlen(string), iv); }  |
108 | inline tuint128 tHashStringMD5(const tString& s, tuint128 iv) { return tHashStringMD5(s.ConstText(), iv); }  |
109 | inline tuint128 tHashData128(const uint8* data, int length, tuint128 iv) { return tHashDataMD5(data, length, iv); }  |
110 | inline tuint128 tHashString128(const char* string, tuint128 iv) { return tHashDataMD5((uint8*)string, tStd::tStrlen(string), iv); }  |
111 | inline tuint128 tHashString128(const tString& s, tuint128 iv) { return tHashStringMD5(s.ConstText(), iv); }  |
112 | inline tuint256 tHashString256(const char* string, const tuint256& iv) { return tHashData256((uint8*)string, tStd::tStrlen(string), iv); }  |
113 | inline tuint256 tHashString256(const tString& s, const tuint256& iv) { return tHashString256(s.ConstText(), iv); }  |
114 |   |
115 |   |
116 | }  |
117 | |