1 | // tString.h  |
2 | //  |
3 | // tString is a simple and readable string class that implements sensible operators, including implicit casts. There is  |
4 | // no UCS2 or UTF16 support since UTF8 is, in my opinion, superior and the way forward. tStrings will work with UTF8.  |
5 | // You cannot stream (from cin etc) more than 512 chars into a string. This restriction is only for wacky << streaming.  |
6 | // For conversions of arbitrary types to tStrings, see tsPrint in the higher level System module.  |
7 | //  |
8 | // Copyright (c) 2004-2006, 2015, 2017, 2019, 2020 Tristan Grimmer.  |
9 | // Copyright (c) 2020 Stefan Wessels.  |
10 | // Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby  |
11 | // granted, provided that the above copyright notice and this permission notice appear in all copies.  |
12 | //  |
13 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL  |
14 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,  |
15 | // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  |
16 | // AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR  |
17 | // PERFORMANCE OF THIS SOFTWARE.  |
18 |   |
19 | #pragma once  |
20 | #include "Foundation/tStandard.h"  |
21 | #include "Foundation/tList.h"  |
22 |   |
23 |   |
24 | struct tString  |
25 | {  |
26 | tString() { TextData = &EmptyChar; }  |
27 | tString(const tString&);  |
28 |   |
29 | // Construct a string with enough room for length characters. Length+1 characters are reserved to make room for the  |
30 | // null terminator. The reserved space is zeroed.  |
31 | explicit tString(int length);  |
32 | tString(const char*);  |
33 | tString(char);  |
34 | virtual ~tString();  |
35 |   |
36 | tString& operator=(const tString&);  |
37 |   |
38 | bool IsEqual(const tString& s) const { return( !tStd::tStrcmp(TextData, s.TextData) ); }  |
39 | bool IsEqual(const char* s) const { return( !tStd::tStrcmp(TextData, s) ); }  |
40 | bool IsEqualCI(const tString& s) const { return( !tStd::tStricmp(TextData, s.TextData) ); }  |
41 | bool IsEqualCI(const char* s) const { return( !tStd::tStricmp(TextData, s) ); }  |
42 |   |
43 | // These allow for implicit conversion to a character pointer.  |
44 | operator const char*() { return TextData; }  |
45 | operator const char*() const { return TextData; }  |
46 |   |
47 | explicit operator uint32();  |
48 | explicit operator uint32() const;  |
49 |   |
50 | char& operator[](int i) { return TextData[i]; }  |
51 | friend tString operator+(const tString& prefix, const tString& suffix);  |
52 | tString& operator+=(const tString&);  |
53 |   |
54 | void Set(const char*);  |
55 | int Length() const { return int(tStd::tStrlen(TextData)); }  |
56 | bool IsEmpty() const { return (TextData == &EmptyChar) || !tStd::tStrlen(TextData); }  |
57 | void Clear() { if (TextData != &EmptyChar) delete[] TextData; TextData = &EmptyChar; }  |
58 |   |
59 | bool IsAlphabetic(bool includeUnderscore = true) const;  |
60 | bool IsNumeric(bool includeDecimal = false) const;  |
61 | bool IsAlphaNumeric(bool includeUnderscore = true, bool includeDecimal = false) const;  |
62 |   |
63 | // Current string data is lost and enough space is reserved for length characters. The reserved memory is zeroed.  |
64 | void Reserve(int length);  |
65 |   |
66 | tString Left(const char marker = ' ') const; // Returns a tString of the characters before the first marker. Returns the entire string if marker was not found.  |
67 | tString Right(const char marker = ' ') const; // Same as Left but chars after last marker.  |
68 | tString Left(int count) const; // Returns a tString of the first count chars. Return what's available if count > length.  |
69 | tString Right(int count) const; // Same as Left but returns last count chars.  |
70 | tString Mid(int start, int count) const; // Returns count chars from start (inclusive), or what's available if start+count > length.  |
71 |   |
72 | // Extracts first word up to and not including first divider encountered. The tString is left with the remainder,  |
73 | // not including the divider. If divider isn't found, the entire string is returned and the tString is left empty.  |
74 | tString (const char divider = ' ');  |
75 |   |
76 | // Extracts word after last divider. The tString is left with the remainder, not including the divider. If the  |
77 | // divider isn't found, the entire string is returned and the tString is left empty.  |
78 | tString (const char divider = ' ');  |
79 |   |
80 | // Returns a tString of the first count chars. Removes these from the current string. If count > length then what's  |
81 | // available is extracted.  |
82 | tString (int count);  |
83 |   |
84 | // Returns a tString of the last count chars. Removes these from the current string. If count > length then what's  |
85 | // available is extracted.  |
86 | tString (int count);  |
87 |   |
88 | // Returns chars from start to count, but also removes that from the tString. If start + count > length then what's  |
89 | // available is extracted.  |
90 | tString (int start, int count);  |
91 |   |
92 | char* Text() { return TextData; }  |
93 | const char* ConstText() const { return TextData; }  |
94 |   |
95 | // Returns POD representation (Plain Old Data). For use with tPrintf and %s.  |
96 | const char* Pod() const { return TextData; }  |
97 |   |
98 | // One more synonym for ConstText. For use with tPrintf and %s.  |
99 | const char* Chars() const { return TextData; }  |
100 |   |
101 | // Returns index of first/last occurrence of char in the string. -1 if not found. Finds last if backwards flag is  |
102 | // set. The starting point may be specified. If backwards is false, the search proceeds forwards from the starting  |
103 | // point. If backwards is true, it proceeds backwards. If startIndex is -1, 0 is the starting point for a forward  |
104 | // search and length-1 is the starting point for a backwards search.  |
105 | int FindChar(const char, bool backwards = false, int startIndex = -1) const;  |
106 |   |
107 | // Returns the index of the first character in the tString that is also somewhere in the null-terminated string  |
108 | // searchChars. Returns -1 if none of them match.  |
109 | int FindAny(const char* searchChars) const;  |
110 |   |
111 | // Returns index of first character of the string s in the string. Returns -1 if not found.  |
112 | int FindString(const char* s, int startIndex = 0) const;  |
113 |   |
114 | // Replace all occurrences of character c with character r. Returns number of characters replaced.  |
115 | int Replace(const char c, const char r);  |
116 |   |
117 | // Replace all occurrences of string search with string replace. Returns the number of replacements. The replacement  |
118 | // is done in a forward direction. If replace is a different size than search, memory will be managed to accomadate  |
119 | // the larger or smaller resulting string and keep the memory footprint as small as possible. If they are the same  |
120 | // size, the function is faster and doesn't need to mess with memory. If replace is "" or 0, all occurrences of  |
121 | // search will be removed (replaced by nothing).  |
122 | int Replace(const char* search, const char* replace);  |
123 |   |
124 | // Remove all occurrences of the character rem. Returns the number of characters removed.  |
125 | int Remove(const char rem);  |
126 |   |
127 | // Removing a string simply calls Replace with a null second string. Returns how many rem strings were removed.  |
128 | int Remove(const char* rem) { return Replace(rem, nullptr); }  |
129 |   |
130 | int RemoveLeading(const char* removeThese); // removeThese is a null-terminated list of chars to remove.  |
131 | int RemoveTrailing(const char* removeThese); // removeThese is a null-terminated list of chars to remove.  |
132 | int CountChar(char c) const; // Counts the number of occurrences of c.  |
133 |   |
134 | void ToUpper() { tStd::tStrupr(TextData); }  |
135 | void ToLower() { tStd::tStrlwr(TextData); }  |
136 |   |
137 | // UpCase and LowCase both modify the object as well as return a reference to it. Returning a reference makes it  |
138 | // easy to string together expressions such as: if (name.LowCase() == "ah")  |
139 | tString& UpCase() { ToUpper(); return *this; }  |
140 | tString& LowCase() { ToLower(); return *this; }  |
141 |   |
142 | // See comment for tStrtoiT in tStandard.h for format requirements. The summary is that if base is -1, the function  |
143 | // looks one of the following prefixes in the string, defaulting to base 10 if none found.  |
144 | //  |
145 | // Base 16 prefixes: x X 0x 0X #  |
146 | // Base 10 prefixes: d D 0d 0D  |
147 | // Base 8 prefixes: o O 0o 0O @  |
148 | // Base 2 prefixes: b B 0b 0B  |
149 | int GetAsInt(int base = -1) const { return GetAsInt32(base); }  |
150 | int32 GetAsInt32(int base = -1) const { return tStd::tStrtoi32(TextData, base); }  |
151 | int64 GetAsInt64(int base = -1) const { return tStd::tStrtoi64(TextData, base); }  |
152 | uint GetAsUInt(int base = -1) const { return GetAsUInt32(base); }  |
153 | uint32 GetAsUInt32(int base = -1) const { return tStd::tStrtoui32(TextData, base); }  |
154 | uint64 GetAsUInt64(int base = -1) const { return tStd::tStrtoui64(TextData, base); }  |
155 |   |
156 | bool GetAsIntStrict(int& IntValue, int base = -1) const { return GetAsInt32Strict(IntValue, base); }  |
157 | bool GetAsInt32Strict(int32& Int32Value, int base = -1) const { return tStd::tStrtoi32Strict(TextData, Int32Value, base); }  |
158 | bool GetAsInt64Strict(int64& Int64Value, int base = -1) const { return tStd::tStrtoi64Strict(TextData, Int64Value, base); }  |
159 | bool GetAsUIntStrict(uint& UIntValue, int base = -1) const { return GetAsUInt32Strict(UIntValue, base); }  |
160 | bool GetAsUInt32Strict(uint32& UInt32Value, int base = -1) const { return tStd::tStrtoui32Strict(TextData, UInt32Value, base); }  |
161 | bool GetAsUInt64Strict(uint64& UInt64Value, int base = -1) const { return tStd::tStrtoui64Strict(TextData, UInt64Value, base); }  |
162 |   |
163 | // Case insensitive. Interprets "true", "t", "yes", "y", "on", "enable", "enabled", "1", "+", and strings that  |
164 | // represent non-zero integers as boolean true. Otherwise false.  |
165 | bool GetAsBool() const { return tStd::tStrtob(TextData); }  |
166 |   |
167 | float GetAsFloat() const /* Base 10 interpretation only. */ { return tStd::tStrtof(TextData); }  |
168 | double GetAsDouble() const /* Base 10 interpretation only. */ { return tStd::tStrtod(TextData); }  |
169 |   |
170 | // Shorter synonyms.  |
171 | int AsInt(int base = -1) const { return GetAsInt(base); }  |
172 | int AsInt32(int base = -1) const { return GetAsInt32(base); }  |
173 | int64 AsInt64(int base = -1) const { return GetAsInt64(base); }  |
174 | uint AsUInt(int base = -1) const { return GetAsUInt(base); }  |
175 | uint AsUInt32(int base = -1) const { return GetAsUInt32(base); }  |
176 | uint64 AsUInt64(int base = -1) const { return GetAsUInt64(base); }  |
177 | bool AsBool() const { return GetAsBool(); }  |
178 | float AsFloat() const { return GetAsFloat(); }  |
179 |   |
180 | protected:  |
181 | char* TextData;  |
182 | static char EmptyChar; // All empty strings can use this.  |
183 | };  |
184 |   |
185 |   |
186 | // Binary operator overloads should be outside the class so we can do things like if ("a" == b) where b is a tString.  |
187 | inline bool operator==(const tString& a, const tString& b) { return !tStd::tStrcmp(a.Chars(), b.Chars()); }  |
188 | inline bool operator!=(const tString& a, const tString& b) { return !!tStd::tStrcmp(a.Chars(), b.Chars()); }  |
189 | inline bool operator==(const tString& a, const char* b) { return !tStd::tStrcmp(a.Chars(), b); }  |
190 | inline bool operator!=(const tString& a, const char* b) { return !!tStd::tStrcmp(a.Chars(), b); }  |
191 | inline bool operator==(const char* a, const tString& b) { return !tStd::tStrcmp(a, b.Chars()); }  |
192 | inline bool operator!=(const char* a, const tString& b) { return !!tStd::tStrcmp(a, b.Chars()); }  |
193 |   |
194 |   |
195 |   |
196 | // The tStringItem class is just the tString class except they can be placed on tLists.  |
197 | struct tStringItem : public tLink<tStringItem>, public tString  |
198 | {  |
199 | public:  |
200 | tStringItem() : tString() { }  |
201 |   |
202 | // The tStringItem copy cons is missing, because as a list item can only be on one list at a time.  |
203 | tStringItem(const tString& s) : tString(s) { }  |
204 | tStringItem(int length) : tString(length) { }  |
205 | tStringItem(const char* c) : tString(c) { }  |
206 | tStringItem(char c) : tString(c) { }  |
207 |   |
208 | // This call does NOT change the list that the tStringItem is on. The link remains unmodified.  |
209 | tStringItem& operator=(const tStringItem&);  |
210 | };  |
211 |   |
212 |   |
213 | // Some utility functions that act on strings.  |
214 | namespace tStd  |
215 | {  |
216 | // Separates the src string into components based on the divider. If src was "abc_def_ghi", components will get  |
217 | // "abc", "def", and "ghi" appended to it. Returns the number of components appended to the components list. The  |
218 | // version that takes the string divider allows for multicharacter dividers. Note that "abc__def_ghi" will explode  |
219 | // to "abc", "", "def", and "ghi". Empty strings are preserved allowing things like exploding text files based on  |
220 | // linefeeds. You'll get one item per line even if the line only contains a linefeed.  |
221 | int tExplode(tList<tStringItem>& components, const tString& src, char divider = '_');  |
222 | int tExplode(tList<tStringItem>& components, const tString& src, const tString& divider);  |
223 | }  |
224 |   |
225 |   |
226 | // Implementation below this line.  |
227 |   |
228 |   |
229 | inline tString::tString(const char* t)  |
230 | {  |
231 | if (t)  |
232 | {  |
233 | int len = int(tStd::tStrlen(t));  |
234 | if (len > 0)  |
235 | {  |
236 | TextData = new char[1 + len];  |
237 | tStd::tStrcpy(TextData, t);  |
238 | return;  |
239 | }  |
240 | }  |
241 |   |
242 | TextData = &EmptyChar;  |
243 | }  |
244 |   |
245 |   |
246 | inline tString::tString(const tString& s)  |
247 | {  |
248 | TextData = new char[1 + tStd::tStrlen(s.TextData)];  |
249 | tStd::tStrcpy(TextData, s.TextData);  |
250 | }  |
251 |   |
252 |   |
253 | inline tString::tString(char c)  |
254 | {  |
255 | TextData = new char[2];  |
256 | TextData[0] = c;  |
257 | TextData[1] = '\0';  |
258 | }  |
259 |   |
260 |   |
261 | inline tString::tString(int length)  |
262 | {  |
263 | if (!length)  |
264 | {  |
265 | TextData = &EmptyChar;  |
266 | }  |
267 | else  |
268 | {  |
269 | TextData = new char[1+length];  |
270 | tStd::tMemset(TextData, 0, 1+length);  |
271 | }  |
272 | }  |
273 |   |
274 |   |
275 | inline void tString::Reserve(int length)  |
276 | {  |
277 | if (TextData != &EmptyChar)  |
278 | delete[] TextData;  |
279 |   |
280 | if (length <= 0)  |
281 | {  |
282 | TextData = &EmptyChar;  |
283 | return;  |
284 | }  |
285 |   |
286 | TextData = new char[length+1];  |
287 | tStd::tMemset(TextData, 0, length+1);  |
288 | }  |
289 |   |
290 |   |
291 | inline int tString::CountChar(char c) const  |
292 | {  |
293 | char* i = TextData;  |
294 | int count = 0;  |
295 | while (*i != '\0')  |
296 | count += (*i++ == c) ? 1 : 0;  |
297 |   |
298 | return count;  |
299 | }  |
300 |   |
301 |   |
302 | inline void tString::Set(const char* s)  |
303 | {  |
304 | Clear();  |
305 | if (!s)  |
306 | return;  |
307 |   |
308 | int len = tStd::tStrlen(s);  |
309 | if (len <= 0)  |
310 | return;  |
311 |   |
312 | TextData = new char[1 + len];  |
313 | tStd::tStrcpy(TextData, s);  |
314 | }  |
315 |   |
316 |   |
317 | inline tString& tString::operator=(const tString& src)  |
318 | {  |
319 | if (this == &src)  |
320 | return *this;  |
321 |   |
322 | if (TextData != &EmptyChar)  |
323 | delete[] TextData;  |
324 |   |
325 | TextData = new char[1 + src.Length()];  |
326 | tStd::tStrcpy(TextData, src.TextData);  |
327 | return *this;  |
328 | }  |
329 |   |
330 |   |
331 | inline tString operator+(const tString& preStr, const tString& sufStr)  |
332 | {  |
333 | tString buf( preStr.Length() + sufStr.Length() );  |
334 | tStd::tStrcpy(buf.TextData, preStr.TextData);  |
335 | tStd::tStrcpy(buf.TextData + preStr.Length(), sufStr.TextData);  |
336 |   |
337 | return buf;  |
338 | }  |
339 |   |
340 |   |
341 | inline tString& tString::operator+=(const tString& sufStr)  |
342 | {  |
343 | if (sufStr.IsEmpty())  |
344 | return *this;  |
345 | else  |
346 | {  |
347 | char* newTextData = new char[ Length() + sufStr.Length() + 1 ];  |
348 | tStd::tStrcpy(newTextData, TextData);  |
349 | tStd::tStrcpy(newTextData + Length(), sufStr.TextData);  |
350 |   |
351 | if (TextData != &EmptyChar)  |
352 | delete[] TextData;  |
353 |   |
354 | TextData = newTextData;  |
355 | return *this;  |
356 | }  |
357 | }  |
358 |   |
359 |   |
360 | inline bool tString::IsAlphabetic(bool includeUnderscore) const   |
361 | {  |
362 | if (TextData == &EmptyChar)  |
363 | return false;  |
364 |   |
365 | const char* c = TextData;  |
366 | while (*c)  |
367 | {  |
368 | if ( !((*c >= 'A' && *c <= 'Z') || (*c >= 'a' && *c <= 'z') || (includeUnderscore && *c == '_')) )  |
369 | return false;  |
370 | c++;  |
371 | }  |
372 |   |
373 | return true;  |
374 | }  |
375 |   |
376 |   |
377 | inline bool tString::IsNumeric(bool includeDecimal) const   |
378 | {  |
379 | if (TextData == &EmptyChar)  |
380 | return false;  |
381 |   |
382 | const char* c = TextData;  |
383 | while (*c)  |
384 | {  |
385 | if ( !((*c >= '0' && *c <= '9') || (includeDecimal && *c == '.')) )  |
386 | return false;  |
387 | c++;  |
388 | }  |
389 |   |
390 | return true;  |
391 | }  |
392 |   |
393 |   |
394 | inline bool tString::IsAlphaNumeric(bool includeUnderscore, bool includeDecimal) const  |
395 | {  |
396 | return (IsAlphabetic(includeUnderscore) || IsNumeric(includeDecimal));  |
397 | }  |
398 |   |
399 |   |
400 | inline int tString::FindAny(const char* chars) const  |
401 | {  |
402 | if (TextData == &EmptyChar)  |
403 | return -1;  |
404 |   |
405 | int i = 0;  |
406 | while (TextData[i])  |
407 | {  |
408 | int j = 0;  |
409 | while (chars[j])  |
410 | {  |
411 | if (chars[j] == TextData[i])  |
412 | return i;  |
413 | j++;  |
414 | }  |
415 | i++;  |
416 | }  |
417 | return -1;  |
418 | }  |
419 |   |
420 |   |
421 | inline int tString::FindChar(const char c, bool reverse, int start) const  |
422 | {  |
423 | const char* pc = nullptr;  |
424 |   |
425 | if (start == -1)  |
426 | {  |
427 | if (reverse)  |
428 | start = Length() - 1;  |
429 | else  |
430 | start = 0;  |
431 | }  |
432 |   |
433 | if (reverse)  |
434 | {  |
435 | for (int i = start; i >= 0; i--)  |
436 | if (TextData[i] == c)  |
437 | {  |
438 | pc = TextData + i;  |
439 | break;  |
440 | }  |
441 | }  |
442 | else  |
443 | pc = tStd::tStrchr(&TextData[start], c);  |
444 |   |
445 | if (!pc)  |
446 | return -1;  |
447 |   |
448 | // Returns the index.  |
449 | return int(pc - TextData);  |
450 | }  |
451 |   |
452 |   |
453 | inline int tString::FindString(const char* s, int start) const  |
454 | {  |
455 | int len = Length();  |
456 | if (!len)  |
457 | return -1;  |
458 |   |
459 | tAssert((start >= 0) && (start < Length()));  |
460 | const char* found = tStd::tStrstr(&TextData[start], s);  |
461 | if (found)  |
462 | return int(found - TextData);  |
463 |   |
464 | return -1;  |
465 | }  |
466 |   |
467 |   |
468 | inline int tString::Replace(const char c, const char r)  |
469 | {  |
470 | int numReplaced = 0;  |
471 | for (int i = 0; i < Length(); i++)  |
472 | {  |
473 | if (TextData[i] == c)  |
474 | {  |
475 | numReplaced++;  |
476 | TextData[i] = r;  |
477 | }  |
478 | }  |
479 |   |
480 | return numReplaced;  |
481 | }  |
482 |   |
483 |   |
484 | inline tString::~tString()  |
485 | {  |
486 | if (TextData != &EmptyChar)  |
487 | delete[] TextData;  |
488 | }  |
489 |   |
490 |   |
491 | inline tStringItem& tStringItem::operator=(const tStringItem& src)  |
492 | {  |
493 | if (this == &src)  |
494 | return *this;  |
495 |   |
496 | if (TextData != &EmptyChar)  |
497 | delete[] TextData;  |
498 |   |
499 | TextData = new char[1 + src.Length()];  |
500 | tStd::tStrcpy(TextData, src.TextData);  |
501 | return *this;  |
502 | }  |
503 | |