1// tHash.h 
2// 
3// Hash functions for various kinds of data. Using 64 or 256 bit versions if you want to avoid collisions. There are two 
4// 32 bit hash functions. A fast version used for most string hashes, and a slower but better version. All functions 
5// return the supplied initialization vector(iv) if there was no data to hash. To compute a single hash from multiple 
6// data sources like strings, binary data, or files, you do NOT need to consolidate all the source data into one buffer 
7// first. Just set the initialization vector to the hash computed from the previous step. 
8// 
9// Copyright (c) 2004-2006, 2015, 2017, 2019 Tristan Grimmer. 
10// Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby 
11// granted, provided that the above copyright notice and this permission notice appear in all copies. 
12// 
13// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL 
14// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 
15// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 
16// AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 
17// PERFORMANCE OF THIS SOFTWARE. 
18 
19#pragma once 
20#include <Foundation/tStandard.h> 
21#include <Foundation/tString.h> 
22#include <Foundation/tFixInt.h> 
23namespace tHash 
24
25 
26 
27enum class tHashAlgorithm 
28
29 Fast32
30 Jenkins32
31 Jenkins64
32 MD5, // MD5 is 128 bit. 
33 Jenkins256 
34}; 
35 
36 
37// These initialization vectors should not be modified unless you want to break a lot of code. The zero 32bit one is 
38// responsible for things like stringhash returning zero on empty strings. 
39const uint32 HashIV32 = 0
40const uint64 HashIV64 = 0
41const tuint128 HashIV128 = 0
42const tuint256 HashIV256 = 0
43 
44// Normally the initial iv should not be modified for these fast hash functions. The fast hash functions are the only 
45// ones that store the entire state in the hash. Allows you to concatenate hashes of separate strings/data-sequences 
46// together by passing the hash of the previous call into the function again. This way you don't need to create a 
47// concatenated string/data-set to get its hash, you simply chain multiple calls together. The fast hash functions 
48// are the only ones that guarantee the same hash value whether computed in parts or as a single data-set. 
49uint32 tHashDataFast32(const uint8* data, int length, uint32 iv = HashIV32); 
50uint32 tHashStringFast32(const char*, uint32 iv = HashIV32); 
51uint32 tHashStringFast32(const tString&, uint32 iv = HashIV32); 
52uint32 tHashString(const char*); 
53 
54// The CT (Compile Time) variant uses the fast-hash algorithm. It is super handy for use in the 'case' part of switch 
55// statements or any time you know the string literal explicitly. In these cases the compiler can do all the work. 
56constexpr uint32 tHashCT(const char*, uint32 iv = HashIV32); 
57 
58// The HashData32/64/128/256 and variants do _not_ guarantee the same hash value if they are chained together compared 
59// to the hash of the same data computed as a single block. This is because the entire state is not stored in the hash 
60// itself since these are much better hash functions than the Fast32 versions. Chaining is still useful as uniqueness is 
61// still guaranteed and if any data changes in any of the sources the end result will vary. Chaining is performed in the 
62// same manner as HashDataFast32. 
63uint32 tHashData32(const uint8* data, int length, uint32 iv = HashIV32); 
64uint32 tHashString32(const char*, uint32 iv = HashIV32); 
65uint32 tHashString32(const tString&, uint32 iv = HashIV32); 
66 
67uint64 tHashData64(const uint8* data, int length, uint64 iv = HashIV64); 
68uint64 tHashString64(const char*, uint64 iv = HashIV64); 
69uint64 tHashString64(const tString&, uint64 iv = HashIV64); 
70 
71// The MD5 functions are used by the HashData128 functions. For reference and testing: 
72// MD5("The quick brown fox jumps over the lazy dog") = 9e107d9d372bb6826bd81d3542a419d6 
73// MD5("The quick brown fox jumps over the lazy dog.") = e4d909c290d0fb1ca068ffaddf22cbd0 
74tuint128 tHashDataMD5(const uint8* data, int length, tuint128 iv = HashIV128); 
75tuint128 tHashStringMD5(const char*, tuint128 iv = HashIV128); 
76tuint128 tHashStringMD5(const tString&, tuint128 iv = HashIV128); 
77 
78tuint128 tHashData128(const uint8* data, int length, tuint128 iv = HashIV128); 
79tuint128 tHashString128(const char*, tuint128 iv = HashIV128); 
80tuint128 tHashString128(const tString&, tuint128 iv = HashIV128); 
81 
82tuint256 tHashData256(const uint8* data, int length, tuint256 iv = HashIV256); 
83tuint256 tHashString256(const char*, const tuint256& iv = HashIV256); 
84tuint256 tHashString256(const tString&, const tuint256& iv = HashIV256); 
85 
86 
87// Implementation below this line. 
88 
89 
90inline uint32 tHashStringFast32(const char* string, uint32 iv
91
92 if (!string
93 return 0
94 return tHashDataFast32((uint8*)string, tStd::tStrlen(string), iv); 
95
96 
97 
98// This (compile-time) constant expression relies on the odometer-style looping of unsigned ints to compute the hash. 
99// Since it's inline, you may need to pragma warning(disable:4307), which warns of const integral overflow. 
100inline constexpr uint32 tHashCT(const char* s, uint32 hash) { return *s ? tHashCT(s + 1, hash + (hash << 5) + uint8(*s)) : hash; } 
101inline uint32 tHashStringFast32(const tString& s, uint32 iv) { return tHashStringFast32(s.ConstText(), iv); } 
102inline uint32 tHashString(const char* s) { return tHashStringFast32(s); } 
103inline uint32 tHashString32(const char* string, uint32 iv) { return tHashData32((uint8*)string, tStd::tStrlen(string), iv); } 
104inline uint32 tHashString32(const tString& s, uint32 iv) { return tHashString32(s.ConstText(), iv); } 
105inline uint64 tHashString64(const char* string, uint64 iv) { return tHashData64((uint8*)string, tStd::tStrlen(string), iv); } 
106inline uint64 tHashString64(const tString& s, uint64 iv) { return tHashString64(s.ConstText(), iv); } 
107inline tuint128 tHashStringMD5(const char* string, tuint128 iv) { return tHashDataMD5((uint8*)string, tStd::tStrlen(string), iv); } 
108inline tuint128 tHashStringMD5(const tString& s, tuint128 iv) { return tHashStringMD5(s.ConstText(), iv); } 
109inline tuint128 tHashData128(const uint8* data, int length, tuint128 iv) { return tHashDataMD5(data, length, iv); } 
110inline tuint128 tHashString128(const char* string, tuint128 iv) { return tHashDataMD5((uint8*)string, tStd::tStrlen(string), iv); } 
111inline tuint128 tHashString128(const tString& s, tuint128 iv) { return tHashStringMD5(s.ConstText(), iv); } 
112inline tuint256 tHashString256(const char* string, const tuint256& iv) { return tHashData256((uint8*)string, tStd::tStrlen(string), iv); } 
113inline tuint256 tHashString256(const tString& s, const tuint256& iv) { return tHashString256(s.ConstText(), iv); } 
114 
115 
116
117