tHash.h source code [Modules/Foundation/Inc/Foundation/tHash.h]

1	// tHash.h
2	//
3	// Hash functions for various kinds of data. Using 64 or 256 bit versions if you want to avoid collisions. There are two
4	// 32 bit hash functions. A fast version used for most string hashes, and a slower but better version. All functions
5	// return the supplied initialization vector(iv) if there was no data to hash. To compute a single hash from multiple
6	// data sources like strings, binary data, or files, you do NOT need to consolidate all the source data into one buffer
7	// first. Just set the initialization vector to the hash computed from the previous step.
8	//
9	// Copyright (c) 2004-2006, 2015, 2017, 2019 Tristan Grimmer.
10	// Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby
11	// granted, provided that the above copyright notice and this permission notice appear in all copies.
12	//
13	// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
14	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
15	// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
16	// AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17	// PERFORMANCE OF THIS SOFTWARE.
18
19	#pragma once
20	#include <Foundation/tStandard.h>
21	#include <Foundation/tString.h>
22	#include <Foundation/tFixInt.h>
23	namespace tHash
24	{
25
26
27	enum class tHashAlgorithm
28	{
29	Fast32,
30	Jenkins32,
31	Jenkins64,
32	MD5, // MD5 is 128 bit.
33	Jenkins256
34	};
35
36
37	// These initialization vectors should not be modified unless you want to break a lot of code. The zero 32bit one is
38	// responsible for things like stringhash returning zero on empty strings.
39	const uint32 HashIV32 = `0`;
40	const uint64 HashIV64 = `0`;
41	const tuint128 HashIV128 = `0`;
42	const tuint256 HashIV256 = `0`;
43
44	// Normally the initial iv should not be modified for these fast hash functions. The fast hash functions are the only
45	// ones that store the entire state in the hash. Allows you to concatenate hashes of separate strings/data-sequences
46	// together by passing the hash of the previous call into the function again. This way you don't need to create a
47	// concatenated string/data-set to get its hash, you simply chain multiple calls together. The fast hash functions
48	// are the only ones that guarantee the same hash value whether computed in parts or as a single data-set.
49	uint32 tHashDataFast32(const uint8* data, int length, uint32 iv = HashIV32);
50	uint32 tHashStringFast32(const char*, uint32 iv = HashIV32);
51	uint32 tHashStringFast32(const tString&, uint32 iv = HashIV32);
52	uint32 tHashString(const char*);
53
54	// The CT (Compile Time) variant uses the fast-hash algorithm. It is super handy for use in the 'case' part of switch
55	// statements or any time you know the string literal explicitly. In these cases the compiler can do all the work.
56	constexpr uint32 tHashCT(const char*, uint32 iv = HashIV32);
57
58	// The HashData32/64/128/256 and variants do _not_ guarantee the same hash value if they are chained together compared
59	// to the hash of the same data computed as a single block. This is because the entire state is not stored in the hash
60	// itself since these are much better hash functions than the Fast32 versions. Chaining is still useful as uniqueness is
61	// still guaranteed and if any data changes in any of the sources the end result will vary. Chaining is performed in the
62	// same manner as HashDataFast32.
63	uint32 tHashData32(const uint8* data, int length, uint32 iv = HashIV32);
64	uint32 tHashString32(const char*, uint32 iv = HashIV32);
65	uint32 tHashString32(const tString&, uint32 iv = HashIV32);
66
67	uint64 tHashData64(const uint8* data, int length, uint64 iv = HashIV64);
68	uint64 tHashString64(const char*, uint64 iv = HashIV64);
69	uint64 tHashString64(const tString&, uint64 iv = HashIV64);
70
71	// The MD5 functions are used by the HashData128 functions. For reference and testing:
72	// MD5("The quick brown fox jumps over the lazy dog") = 9e107d9d372bb6826bd81d3542a419d6
73	// MD5("The quick brown fox jumps over the lazy dog.") = e4d909c290d0fb1ca068ffaddf22cbd0
74	tuint128 tHashDataMD5(const uint8* data, int length, tuint128 iv = HashIV128);
75	tuint128 tHashStringMD5(const char*, tuint128 iv = HashIV128);
76	tuint128 tHashStringMD5(const tString&, tuint128 iv = HashIV128);
77
78	tuint128 tHashData128(const uint8* data, int length, tuint128 iv = HashIV128);
79	tuint128 tHashString128(const char*, tuint128 iv = HashIV128);
80	tuint128 tHashString128(const tString&, tuint128 iv = HashIV128);
81
82	tuint256 tHashData256(const uint8* data, int length, tuint256 iv = HashIV256);
83	tuint256 tHashString256(const char, const* tuint256& iv = HashIV256);
84	tuint256 tHashString256(const tString&, const tuint256& iv = HashIV256);
85
86
87	// Implementation below this line.
88
89
90	inline uint32 tHashStringFast32(const char* string, uint32 iv)
91	{
92	if (!string)
93	return `0`;
94	return tHashDataFast32((uint8*)string, tStd::tStrlen(string), iv);
95	}
96
97
98	// This (compile-time) constant expression relies on the odometer-style looping of unsigned ints to compute the hash.
99	// Since it's inline, you may need to pragma warning(disable:4307), which warns of const integral overflow.
100	inline constexpr uint32 tHashCT(const char* s, uint32 hash) { return s ? tHashCT(s + `1`, hash + (hash << `5`) + uint8(s)) : hash; }
101	inline uint32 tHashStringFast32(const tString& s, uint32 iv) { return tHashStringFast32(s.ConstText(), iv); }
102	inline uint32 tHashString(const char* s) { return tHashStringFast32(s); }
103	inline uint32 tHashString32(const char* string, uint32 iv) { return tHashData32((uint8*)string, tStd::tStrlen(string), iv); }
104	inline uint32 tHashString32(const tString& s, uint32 iv) { return tHashString32(s.ConstText(), iv); }
105	inline uint64 tHashString64(const char* string, uint64 iv) { return tHashData64((uint8*)string, tStd::tStrlen(string), iv); }
106	inline uint64 tHashString64(const tString& s, uint64 iv) { return tHashString64(s.ConstText(), iv); }
107	inline tuint128 tHashStringMD5(const char* string, tuint128 iv) { return tHashDataMD5((uint8*)string, tStd::tStrlen(string), iv); }
108	inline tuint128 tHashStringMD5(const tString& s, tuint128 iv) { return tHashStringMD5(s.ConstText(), iv); }
109	inline tuint128 tHashData128(const uint8* data, int length, tuint128 iv) { return tHashDataMD5(data, length, iv); }
110	inline tuint128 tHashString128(const char* string, tuint128 iv) { return tHashDataMD5((uint8*)string, tStd::tStrlen(string), iv); }
111	inline tuint128 tHashString128(const tString& s, tuint128 iv) { return tHashStringMD5(s.ConstText(), iv); }
112	inline tuint256 tHashString256(const char* string, const tuint256& iv) { return tHashData256((uint8*)string, tStd::tStrlen(string), iv); }
113	inline tuint256 tHashString256(const tString& s, const tuint256& iv) { return tHashString256(s.ConstText(), iv); }
114
115
116	}
117

Source File Modules/Foundation/Inc/Foundation/tHash.hHome

Source File Modules/Foundation/Inc/Foundation/tHash.h
Home