-
Notifications
You must be signed in to change notification settings - Fork 1
/
util.cpp
2317 lines (2081 loc) · 109 KB
/
util.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
AutoHotkey
Copyright 2003-2008 Chris Mallett (support@autohotkey.com)
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
*/
#include "stdafx.h" // pre-compiled headers
#include <olectl.h> // for OleLoadPicture()
#include "msc_headers/GdiPlus.h" // Used by LoadPicture().
#include "util.h"
#include "globaldata.h"
int GetYDay(int aMon, int aDay, bool aIsLeapYear)
// Returns a number between 1 and 366.
// Caller must verify that aMon is a number between 1 and 12, and aDay is a number between 1 and 31.
{
--aMon; // Convert to zero-based.
if (aIsLeapYear)
{
int leap_offset[12] = {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335};
return leap_offset[aMon] + aDay;
}
else
{
int normal_offset[12] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334};
return normal_offset[aMon] + aDay;
}
}
int GetISOWeekNumber(char *aBuf, int aYear, int aYDay, int aWDay)
// Caller must ensure that aBuf is of size 7 or greater, that aYear is a valid year (e.g. 2005),
// that aYDay is between 1 and 366, and that aWDay is between 0 and 6 (day of the week).
// Produces the week number in YYYYNN format, e.g. 200501.
// Note that year is also returned because it isn't necessarily the same as aTime's calendar year.
// Based on Linux glibc source code (GPL).
{
--aYDay; // Convert to zero based.
#define ISO_WEEK_START_WDAY 1 // Monday
#define ISO_WEEK1_WDAY 4 // Thursday
#define ISO_WEEK_DAYS(yday, wday) (yday - (yday - wday + ISO_WEEK1_WDAY + ((366 / 7 + 2) * 7)) % 7 \
+ ISO_WEEK1_WDAY - ISO_WEEK_START_WDAY);
int year = aYear;
int days = ISO_WEEK_DAYS(aYDay, aWDay);
if (days < 0) // This ISO week belongs to the previous year.
{
--year;
days = ISO_WEEK_DAYS(aYDay + (365 + IS_LEAP_YEAR(year)), aWDay);
}
else
{
int d = ISO_WEEK_DAYS(aYDay - (365 + IS_LEAP_YEAR(year)), aWDay);
if (0 <= d) // This ISO week belongs to the next year.
{
++year;
days = d;
}
}
// Use snprintf() for safety; that is, in case year contains a value longer than 4 digits.
// This also adds the leading zeros in front of year and week number, if needed.
return snprintf(aBuf, 7, "%04d%02d", year, (days / 7) + 1); // Return the length of the string produced.
}
ResultType YYYYMMDDToFileTime(char *aYYYYMMDD, FILETIME &aFileTime)
{
SYSTEMTIME st;
YYYYMMDDToSystemTime(aYYYYMMDD, st, false); // "false" because it's validated below.
// This will return failure if aYYYYMMDD contained any invalid elements, such as an
// explicit zero for the day of the month. It also reports failure if st.wYear is
// less than 1601, which for simplicity is enforced globally throughout the program
// since none of the Windows API calls seem to support earlier years.
return SystemTimeToFileTime(&st, &aFileTime) ? OK : FAIL; // The st.wDayOfWeek member is ignored.
}
DWORD YYYYMMDDToSystemTime2(char *aYYYYMMDD, SYSTEMTIME *aSystemTime)
// Calls YYYYMMDDToSystemTime() to fill up to two elements of the aSystemTime array.
// Returns a GDTR bitwise combination to indicate which of the two elements, or both, are valid.
// Caller must ensure that aYYYYMMDD is a modifiable string since it's temporarily altered and restored here.
{
DWORD gdtr = 0;
if (!*aYYYYMMDD)
return gdtr;
if (*aYYYYMMDD != '-') // Since first char isn't a dash, there is a minimum present.
{
char *cp;
if (cp = strchr(aYYYYMMDD + 1, '-'))
*cp = '\0'; // Temporarily terminate in case only the leading part of the YYYYMMDD format is present. Otherwise, the dash and other chars would be considered invalid fields.
if (YYYYMMDDToSystemTime(aYYYYMMDD, aSystemTime[0], true)) // Date string is valid.
gdtr |= GDTR_MIN; // Indicate that minimum is present.
if (cp)
{
*cp = '-'; // Undo the temp. termination.
aYYYYMMDD = cp + 1; // Set it to the maximum's position for use below.
}
else // No dash, so there is no maximum. Indicate this by making aYYYYMMDD empty.
aYYYYMMDD = "";
}
else // *aYYYYMMDD=='-', so only the maximum is present; thus there will be no minimum.
++aYYYYMMDD; // Skip over the dash to set it to the maximum's position.
if (*aYYYYMMDD) // There is a maximum.
{
if (YYYYMMDDToSystemTime(aYYYYMMDD, aSystemTime[1], true)) // Date string is valid.
gdtr |= GDTR_MAX; // Indicate that maximum is present.
}
return gdtr;
}
ResultType YYYYMMDDToSystemTime(char *aYYYYMMDD, SYSTEMTIME &aSystemTime, bool aDoValidate)
// Although aYYYYMMDD need not be terminated at the end of the YYYYMMDDHH24MISS string (as long as
// the string's capacity is at least 14), it should be terminated if only the leading part
// of the YYYYMMDDHH24MISS format is present.
// Caller must ensure that aYYYYMMDD is non-NULL. If aDoValidate is false, OK is always
// returned and aSystemTime might contain invalid elements. Otherwise, FAIL will be returned
// if the date and time contains any invalid elements, or if the year is less than 1601
// (Windows generally does not support earlier years).
{
// sscanf() is avoided because it adds 2 KB to the compressed EXE size.
char temp[16];
size_t length = strlen(aYYYYMMDD); // Use this rather than incrementing the pointer in case there are ever partial fields such as 20051 vs. 200501.
strlcpy(temp, aYYYYMMDD, 5);
aSystemTime.wYear = atoi(temp);
if (length > 4) // It has a month component.
{
strlcpy(temp, aYYYYMMDD + 4, 3);
aSystemTime.wMonth = atoi(temp); // Unlike "struct tm", SYSTEMTIME uses 1 for January, not 0.
if (aSystemTime.wMonth < 1 || aSystemTime.wMonth > 12) // v1.0.46.07: Must validate month since it's used to access an array further below.
aSystemTime.wMonth = 1; // For simplicity and due to extreme rarity, just choose an in-range value.
}
else
aSystemTime.wMonth = 1;
if (length > 6) // It has a day-of-month component.
{
strlcpy(temp, aYYYYMMDD + 6, 3);
aSystemTime.wDay = atoi(temp);
}
else
aSystemTime.wDay = 1;
if (length > 8) // It has an hour component.
{
strlcpy(temp, aYYYYMMDD + 8, 3);
aSystemTime.wHour = atoi(temp);
}
else
aSystemTime.wHour = 0; // Midnight.
if (length > 10) // It has a minutes component.
{
strlcpy(temp, aYYYYMMDD + 10, 3);
aSystemTime.wMinute = atoi(temp);
}
else
aSystemTime.wMinute = 0;
if (length > 12) // It has a seconds component.
{
strlcpy(temp, aYYYYMMDD + 12, 3);
aSystemTime.wSecond = atoi(temp);
}
else
aSystemTime.wSecond = 0;
aSystemTime.wMilliseconds = 0; // Always set to zero in this case.
// Day-of-week code by Tomohiko Sakamoto:
static int t[] = {0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4};
int y = aSystemTime.wYear;
y -= aSystemTime.wMonth < 3;
aSystemTime.wDayOfWeek = (y + y/4 - y/100 + y/400 + t[aSystemTime.wMonth-1] + aSystemTime.wDay) % 7;
if (aDoValidate)
{
FILETIME ft;
// This will return failure if aYYYYMMDD contained any invalid elements, such as an
// explicit zero for the day of the month. It also reports failure if st.wYear is
// less than 1601, which for simplicity is enforced globally throughout the program
// since none of the Windows API calls seem to support earlier years.
return SystemTimeToFileTime(&aSystemTime, &ft) ? OK : FAIL;
// Above: The st.wDayOfWeek member is ignored, but that's okay since only the YYYYMMDDHH24MISS part
// needs validation.
}
return OK;
}
char *FileTimeToYYYYMMDD(char *aBuf, FILETIME &aTime, bool aConvertToLocalTime)
// Returns aBuf.
{
FILETIME ft;
if (aConvertToLocalTime)
FileTimeToLocalFileTime(&aTime, &ft); // MSDN says that target cannot be the same var as source.
else
memcpy(&ft, &aTime, sizeof(FILETIME)); // memcpy() might be less code size that a struct assignment, ft = aTime.
SYSTEMTIME st;
if (FileTimeToSystemTime(&ft, &st))
return SystemTimeToYYYYMMDD(aBuf, st);
*aBuf = '\0';
return aBuf;
}
char *SystemTimeToYYYYMMDD(char *aBuf, SYSTEMTIME &aTime)
// Returns aBuf.
// Remember not to offer a "aConvertToLocalTime" option, because calling SystemTimeToTzSpecificLocalTime()
// on Win9x apparently results in an invalid time because the function is implemented only as a stub on
// those OSes.
{
sprintf(aBuf, "%04d%02d%02d" "%02d%02d%02d"
, aTime.wYear, aTime.wMonth, aTime.wDay
, aTime.wHour, aTime.wMinute, aTime.wSecond);
return aBuf;
}
__int64 YYYYMMDDSecondsUntil(char *aYYYYMMDDStart, char *aYYYYMMDDEnd, bool &aFailed)
// Returns the number of seconds from aYYYYMMDDStart until aYYYYMMDDEnd.
// If aYYYYMMDDStart is blank, the current time will be used in its place.
{
aFailed = true; // Set default for output parameter, in case of early return.
if (!aYYYYMMDDStart || !aYYYYMMDDEnd) return 0;
FILETIME ftStart, ftEnd, ftNowUTC;
if (*aYYYYMMDDStart)
{
if (!YYYYMMDDToFileTime(aYYYYMMDDStart, ftStart))
return 0;
}
else // Use the current time in its place.
{
GetSystemTimeAsFileTime(&ftNowUTC);
FileTimeToLocalFileTime(&ftNowUTC, &ftStart); // Convert UTC to local time.
}
if (*aYYYYMMDDEnd)
{
if (!YYYYMMDDToFileTime(aYYYYMMDDEnd, ftEnd))
return 0;
}
else // Use the current time in its place.
{
GetSystemTimeAsFileTime(&ftNowUTC);
FileTimeToLocalFileTime(&ftNowUTC, &ftEnd); // Convert UTC to local time.
}
aFailed = false; // Indicate success.
return FileTimeSecondsUntil(&ftStart, &ftEnd);
}
__int64 FileTimeSecondsUntil(FILETIME *pftStart, FILETIME *pftEnd)
// Returns the number of seconds from pftStart until pftEnd.
{
if (!pftStart || !pftEnd) return 0;
// The calculation is done this way for compilers that don't support 64-bit math operations (not sure which):
// Note: This must be LARGE vs. ULARGE because we want the calculation to be signed for cases where
// pftStart is greater than pftEnd:
ULARGE_INTEGER uiStart, uiEnd;
uiStart.LowPart = pftStart->dwLowDateTime;
uiStart.HighPart = pftStart->dwHighDateTime;
uiEnd.LowPart = pftEnd->dwLowDateTime;
uiEnd.HighPart = pftEnd->dwHighDateTime;
// Must do at least the inner cast to avoid losing negative results:
return (__int64)((__int64)(uiEnd.QuadPart - uiStart.QuadPart) / 10000000); // Convert from tenths-of-microsecond.
}
SymbolType IsPureNumeric(char *aBuf, BOOL aAllowNegative, BOOL aAllowAllWhitespace
, BOOL aAllowFloat, BOOL aAllowImpure) // BOOL vs. bool might squeeze a little more performance out of this frequently-called function.
// String can contain whitespace.
// If aBuf doesn't contain something purely numeric, PURE_NOT_NUMERIC is returned. The same happens if
// aBuf contains a float but aAllowFloat is false. Otherwise, PURE_INTEGER or PURE_FLOAT is returned.
// If aAllowAllWhitespace==true and the string is blank or all whitespace, PURE_INTEGER is returned.
// Obsolete comment: Making this non-inline reduces the size of the compressed EXE by only 2K. Since this
// function is called so often, it seems preferable to keep it inline for performance.
{
aBuf = omit_leading_whitespace(aBuf); // i.e. caller doesn't have to have ltrimmed, only rtrimmed.
if (!*aBuf) // The string is empty or consists entirely of whitespace.
return aAllowAllWhitespace ? PURE_INTEGER : PURE_NOT_NUMERIC;
if (*aBuf == '-')
{
if (aAllowNegative)
++aBuf;
else
return PURE_NOT_NUMERIC;
}
else if (*aBuf == '+')
++aBuf;
// Relies on short circuit boolean order to prevent reading beyond the end of the string:
BOOL is_hex = IS_HEX(aBuf); // BOOL vs. bool might squeeze a little more performance out this frequently-called function.
if (is_hex)
aBuf += 2; // Skip over the 0x prefix.
// Set defaults:
BOOL has_decimal_point = false;
BOOL has_at_least_one_digit = false; // i.e. a string consisting of only "+", "-" or "." is not considered numeric.
char c;
for (;; ++aBuf)
{
c = *aBuf;
if (IS_SPACE_OR_TAB(c))
{
if (*omit_leading_whitespace(aBuf)) // But that space or tab is followed by something other than whitespace.
if (!aAllowImpure) // e.g. "123 456" is not a valid pure number.
return PURE_NOT_NUMERIC;
// else fall through to the bottom logic.
// else since just whitespace at the end, the number qualifies as pure, so fall through to the bottom
// logic (it would already have returned in the loop if it was impure)
break;
}
if (!c) // End of string was encountered.
break; // The number qualifies as pure, so fall through to the logic at the bottom. (It would already have returned elsewhere in the loop if the number is impure).
if (c == '.')
{
if (!aAllowFloat || has_decimal_point || is_hex)
// i.e. if aBuf contains 2 decimal points, it can't be a valid number.
// Note that decimal points are allowed in hexadecimal strings, e.g. 0xFF.EE.
// But since that format doesn't seem to be supported by VC++'s atof() and probably
// related functions, and since it's extremely rare, it seems best not to support it.
return PURE_NOT_NUMERIC;
else
has_decimal_point = true;
}
else
{
if (is_hex ? !isxdigit(c) : (c < '0' || c > '9')) // And since we're here, it's not '.' either.
{
if (aAllowImpure) // Since aStr starts with a number (as verified above), it is considered a number.
{
if (has_at_least_one_digit)
return has_decimal_point ? PURE_FLOAT : PURE_INTEGER;
else // i.e. the strings "." and "-" are not considered to be numeric by themselves.
return PURE_NOT_NUMERIC;
}
else
{
// As written below, this actually tolerates malformed scientific notation such as numbers
// containing two or more E's (e.g. 1.0e4e+5e-6,). But for performance and due to rarity,
// it seems best not to check for them.
if (toupper(c) != 'E' // v1.0.46.11: Support scientific notation in floating point numbers.
|| !(has_decimal_point && has_at_least_one_digit)) // But it must have a decimal point and at least one digit to the left of the 'E'. This avoids variable names like "1e4" from being seen as sci-notation literals (for backward compatibility). Some callers rely on this check.
return PURE_NOT_NUMERIC;
if (aBuf[1] == '-' || aBuf[1] == '+') // The optional sign is present on the exponent.
++aBuf; // Omit it from further consideration so that the outer loop doesn't see it as an extra/illegal sign.
if (aBuf[1] < '0' || aBuf[1] > '9')
// Even if it is an 'e', ensure what follows it is a valid exponent. Some callers rely
// on this check, such as ones that expect "0.6e" to be non-numeric (for "SetFormat Float")
return PURE_NOT_NUMERIC;
}
}
else // This character is a valid digit or hex-digit.
has_at_least_one_digit = true;
}
} // for()
if (has_at_least_one_digit)
return has_decimal_point ? PURE_FLOAT : PURE_INTEGER;
else
return PURE_NOT_NUMERIC; // i.e. the strings "+" "-" and "." are not numeric by themselves.
}
void strlcpy(char *aDst, const char *aSrc, size_t aDstSize) // Non-inline because it benches slightly faster that way.
// Caller must ensure that aDstSize is greater than 0.
// Caller must ensure that the entire capacity of aDst is writable, EVEN WHEN it knows that aSrc is much shorter
// than the aDstSize. This is because the call to strncpy (which is used for its superior performance) zero-fills
// any unused portion of aDst.
// Description:
// Same as strncpy() but guarantees null-termination of aDst upon return.
// No more than aDstSize - 1 characters will be copied from aSrc into aDst
// (leaving room for the zero terminator, which is always inserted).
// This function is defined in some Unices but is not standard. But unlike
// other versions, this one uses void for return value for reduced code size
// (since it's called in so many places).
{
// Disabled for performance and reduced code size:
//if (!aDst || !aSrc || !aDstSize) return aDstSize; // aDstSize must not be zero due to the below method.
// It might be worthwhile to have a custom char-copying-loop here someday so that number of characters
// actually copied (not including the zero terminator) can be returned to callers who want it.
--aDstSize; // Convert from size to length (caller has ensured that aDstSize > 0).
strncpy(aDst, aSrc, aDstSize); // NOTE: In spite of its zero-filling, strncpy() benchmarks considerably faster than a custom loop, probably because it uses 32-bit memory operations vs. 8-bit.
aDst[aDstSize] = '\0';
}
int snprintf(char *aBuf, int aBufSize, const char *aFormat, ...)
// aBufSize is an int so that any negative values passed in from caller are not lost.
// aBuf will always be terminated here except when aBufSize is <= zero (in which case the caller should
// already have terminated it). If aBufSize is greater than zero but not large enough to hold the
// entire result, as much of the result as possible is copied and the return value is aBufSize - 1.
// Returns the exact number of characters written, not including the zero terminator. A negative
// number is never returned, even if aBufSize is <= zero (which means there isn't even enough space left
// to write a zero terminator), under the assumption that the caller has already terminated the string
// and thus prefers to have 0 rather than -1 returned in such cases.
// MSDN says (about _snprintf(), and testing shows that it applies to _vsnprintf() too): "This function
// does not guarantee NULL termination, so ensure it is followed by sz[size - 1] = 0".
{
// The following should probably never be changed without a full suite of tests to ensure the
// change doesn't cause the finicky _vsnprintf() to break something.
if (aBufSize < 1 || !aBuf || !aFormat) return 0; // It's called from so many places that the extra checks seem warranted.
va_list ap;
va_start(ap, aFormat);
// Must use _vsnprintf() not _snprintf() because of the way va_list is handled:
int result = _vsnprintf(aBuf, aBufSize, aFormat, ap); // "returns the number of characters written, not including the terminating null character, or a negative value if an output error occurs"
aBuf[aBufSize - 1] = '\0'; // Confirmed through testing: Must terminate at this exact spot because _vsnprintf() doesn't always do it.
// Fix for v1.0.34: If result==aBufSize, must reduce result by 1 to return an accurate result to the
// caller. In other words, if the line above turned the last character into a terminator, one less character
// is now present in aBuf.
if (result == aBufSize)
--result;
return result > -1 ? result : aBufSize - 1; // Never return a negative value. See comment under function definition, above.
}
int snprintfcat(char *aBuf, int aBufSize, const char *aFormat, ...)
// aBufSize is an int so that any negative values passed in from caller are not lost.
// aBuf will always be terminated here except when the amount of space left in the buffer is zero or less.
// (in which case the caller should already have terminated it). If aBufSize is greater than zero but not
// large enough to hold the entire result, as much of the result as possible is copied and the return value
// is space_remaining - 1.
// The caller must have ensured that aBuf and aFormat are non-NULL and that aBuf contains a valid string
// (i.e. that it is null-terminated somewhere within the limits of aBufSize).
// Returns the exact number of characters written, not including the zero terminator. A negative
// number is never returned, even if aBufSize is <= zero (which means there isn't even enough space left
// to write a zero terminator), under the assumption that the caller has already terminated the string
// and thus prefers to have 0 rather than -1 returned in such cases.
{
// The following should probably never be changed without a full suite of tests to ensure the
// change doesn't cause the finicky _vsnprintf() to break something.
size_t length = strlen(aBuf);
int space_remaining = (int)(aBufSize - length); // Must cast to int to avoid loss of negative values.
if (space_remaining < 1) // Can't even terminate it (no room) so just indicate that no characters were copied.
return 0;
aBuf += length; // aBuf is now the spot where the new text will be written.
va_list ap;
va_start(ap, aFormat);
// Must use vsnprintf() not snprintf() because of the way va_list is handled:
int result = _vsnprintf(aBuf, (size_t)space_remaining, aFormat, ap); // "returns the number of characters written, not including the terminating null character, or a negative value if an output error occurs"
aBuf[space_remaining - 1] = '\0'; // Confirmed through testing: Must terminate at this exact spot because _vsnprintf() doesn't always do it.
return result > -1 ? result : space_remaining - 1; // Never return a negative value. See comment under function definition, above.
}
// Not currently used by anything, so commented out to possibly reduce code size:
//int strlcmp(char *aBuf1, char *aBuf2, UINT aLength1, UINT aLength2)
//// Case sensitive version. See strlicmp() comments below.
//{
// if (!aBuf1 || !aBuf2) return 0;
// if (aLength1 == UINT_MAX) aLength1 = (UINT)strlen(aBuf1);
// if (aLength2 == UINT_MAX) aLength2 = (UINT)strlen(aBuf2);
// UINT least_length = aLength1 < aLength2 ? aLength1 : aLength2;
// int diff;
// for (UINT i = 0; i < least_length; ++i)
// if ( diff = (int)((UCHAR)aBuf1[i] - (UCHAR)aBuf2[i]) ) // Use unsigned chars like strcmp().
// return diff;
// return (int)(aLength1 - aLength2);
//}
int strlicmp(char *aBuf1, char *aBuf2, UINT aLength1, UINT aLength2)
// Similar to strnicmp but considers each aBuf to be a string of length aLength if aLength was
// specified. In other words, unlike strnicmp() which would consider strnicmp("ab", "abc", 2)
// [example verified correct] to be a match, this function would consider them to be
// a mismatch. Another way of looking at it: aBuf1 and aBuf2 will be directly
// compared to one another as though they were actually of length aLength1 and
// aLength2, respectively and then passed to stricmp() (not strnicmp) as those
// shorter strings. This behavior is useful for cases where you don't want
// to have to bother with temporarily terminating a string so you can compare
// only a substring to something else. The return value meaning is the
// same as strnicmp(). If either aLength param is UINT_MAX (via the default
// parameters or via explicit call), it will be assumed that the entire
// length of the respective aBuf will be used.
{
if (!aBuf1 || !aBuf2) return 0;
if (aLength1 == UINT_MAX) aLength1 = (UINT)strlen(aBuf1);
if (aLength2 == UINT_MAX) aLength2 = (UINT)strlen(aBuf2);
UINT least_length = aLength1 < aLength2 ? aLength1 : aLength2;
int diff;
for (UINT i = 0; i < least_length; ++i)
if ( diff = (int)((UCHAR)toupper(aBuf1[i]) - (UCHAR)toupper(aBuf2[i])) )
return diff;
// Since the above didn't return, the strings are equal if they're the same length.
// Otherwise, the longer one is considered greater than the shorter one since the
// longer one's next character is by definition something non-zero. I'm not completely
// sure that this is the same policy followed by ANSI strcmp():
return (int)(aLength1 - aLength2);
}
char *strrstr(char *aStr, char *aPattern, StringCaseSenseType aStringCaseSense, int aOccurrence)
// Returns NULL if not found, otherwise the address of the found string.
// This could probably use a faster algorithm someday. For now it seems adequate because
// scripts rarely use it and when they do, it's usually on short haystack strings (such as
// to find the last period in a filename).
{
if (aOccurrence < 1)
return NULL;
size_t aStr_length = strlen(aStr);
if (!*aPattern)
// The empty string is found in every string, and since we're searching from the right, return
// the position of the zero terminator to indicate the situation:
return aStr + aStr_length;
size_t aPattern_length = strlen(aPattern);
char aPattern_last_char = aPattern[aPattern_length - 1];
char aPattern_last_char_lower = (aStringCaseSense == SCS_INSENSITIVE_LOCALE)
? (char)(int)ltolower(aPattern_last_char)
: tolower(aPattern_last_char);
int occurrence = 0;
char *match_starting_pos = aStr + aStr_length - 1;
// Keep finding matches from the right until the Nth occurrence (specified by the caller) is found.
for (;;)
{
if (match_starting_pos < aStr)
return NULL; // No further matches are possible.
// Find (from the right) the first occurrence of aPattern's last char:
char *last_char_match;
for (last_char_match = match_starting_pos; last_char_match >= aStr; --last_char_match)
{
if (aStringCaseSense == SCS_INSENSITIVE) // The most common mode is listed first for performance.
{
if (tolower(*last_char_match) == aPattern_last_char_lower)
break;
}
else if (aStringCaseSense == SCS_INSENSITIVE_LOCALE)
{
if ((char)(int)ltolower(*last_char_match) == aPattern_last_char_lower)
break;
}
else // Case sensitive.
{
if (*last_char_match == aPattern_last_char)
break;
}
}
if (last_char_match < aStr) // No further matches are possible.
return NULL;
// Now that aPattern's last character has been found in aStr, ensure the rest of aPattern
// exists in aStr to the left of last_char_match:
char *full_match, *cp;
bool found;
for (found = false, cp = aPattern + aPattern_length - 2, full_match = last_char_match - 1;; --cp, --full_match)
{
if (cp < aPattern) // The complete pattern has been found at the position in full_match + 1.
{
++full_match; // Adjust for the prior iteration's decrement.
if (++occurrence == aOccurrence)
return full_match;
found = true;
break;
}
if (full_match < aStr) // Only after checking the above is this checked.
break;
if (aStringCaseSense == SCS_INSENSITIVE) // The most common mode is listed first for performance.
{
if (tolower(*full_match) != tolower(*cp))
break;
}
else if (aStringCaseSense == SCS_INSENSITIVE_LOCALE)
{
if (ltolower(*full_match) != ltolower(*cp))
break;
}
else // Case sensitive.
{
if (*full_match != *cp)
break;
}
} // for() innermost
if (found) // Although the above found a match, it wasn't the right one, so resume searching.
match_starting_pos = full_match - 1;
else // the pattern broke down, so resume searching at THIS position.
match_starting_pos = last_char_match - 1; // Don't go back by more than 1.
} // while() find next match
}
char *strcasestr(const char *phaystack, const char *pneedle)
// To make this work with MS Visual C++, this version uses tolower/toupper() in place of
// _tolower/_toupper(), since apparently in GNU C, the underscore macros are identical
// to the non-underscore versions; but in MS the underscore ones do an unconditional
// conversion (mangling non-alphabetic characters such as the zero terminator). MSDN:
// tolower: Converts c to lowercase if appropriate
// _tolower: Converts c to lowercase
// Return the offset of one string within another.
// Copyright (C) 1994,1996,1997,1998,1999,2000 Free Software Foundation, Inc.
// This file is part of the GNU C Library.
// The GNU C Library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// The GNU C Library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
// You should have received a copy of the GNU Lesser General Public
// License along with the GNU C Library; if not, write to the Free
// Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
// 02111-1307 USA.
// My personal strstr() implementation that beats most other algorithms.
// Until someone tells me otherwise, I assume that this is the
// fastest implementation of strstr() in C.
// I deliberately chose not to comment it. You should have at least
// as much fun trying to understand it, as I had to write it :-).
// Stephen R. van den Berg, berg@pool.informatik.rwth-aachen.de
// Faster looping by precalculating bl, bu, cl, cu before looping.
// 2004 Apr 08 Jose Da Silva, digital@joescat@com
{
register const unsigned char *haystack, *needle;
register unsigned bl, bu, cl, cu;
haystack = (const unsigned char *) phaystack;
needle = (const unsigned char *) pneedle;
bl = tolower(*needle);
if (bl != '\0')
{
// Scan haystack until the first character of needle is found:
bu = toupper(bl);
haystack--; /* possible ANSI violation */
do
{
cl = *++haystack;
if (cl == '\0')
goto ret0;
}
while ((cl != bl) && (cl != bu));
// See if the rest of needle is a one-for-one match with this part of haystack:
cl = tolower(*++needle);
if (cl == '\0') // Since needle consists of only one character, it is already a match as found above.
goto foundneedle;
cu = toupper(cl);
++needle;
goto jin;
for (;;)
{
register unsigned a;
register const unsigned char *rhaystack, *rneedle;
do
{
a = *++haystack;
if (a == '\0')
goto ret0;
if ((a == bl) || (a == bu))
break;
a = *++haystack;
if (a == '\0')
goto ret0;
shloop:
;
}
while ((a != bl) && (a != bu));
jin:
a = *++haystack;
if (a == '\0') // Remaining part of haystack is shorter than needle. No match.
goto ret0;
if ((a != cl) && (a != cu)) // This promising candidate is not a complete match.
goto shloop; // Start looking for another match on the first char of needle.
rhaystack = haystack-- + 1;
rneedle = needle;
a = tolower(*rneedle);
if (tolower(*rhaystack) == (int) a)
do
{
if (a == '\0')
goto foundneedle;
++rhaystack;
a = tolower(*++needle);
if (tolower(*rhaystack) != (int) a)
break;
if (a == '\0')
goto foundneedle;
++rhaystack;
a = tolower(*++needle);
}
while (tolower(*rhaystack) == (int) a);
needle = rneedle; /* took the register-poor approach */
if (a == '\0')
break;
} // for(;;)
} // if (bl != '\0')
foundneedle:
return (char*) haystack;
ret0:
return 0;
}
char *lstrcasestr(const char *phaystack, const char *pneedle)
// This is the locale-obeying variant of strcasestr. It uses CharUpper/Lower in place of toupper/lower,
// which sees chars like ä as the same as Ä (depending on code page/locale). This function is about
// 1 to 8 times slower than strcasestr() depending on factors such as how many partial matches for needle
// are in haystack.
// License: GNU GPL
// Copyright (C) 1994,1996,1997,1998,1999,2000 Free Software Foundation, Inc.
// See strcasestr() for more comments.
{
register const unsigned char *haystack, *needle;
register unsigned bl, bu, cl, cu;
haystack = (const unsigned char *) phaystack;
needle = (const unsigned char *) pneedle;
bl = (UINT)(size_t)ltolower(*needle); // Double cast avoids compiler warning without increasing code size.
if (bl != 0)
{
// Scan haystack until the first character of needle is found:
bu = (UINT)(size_t)ltoupper(bl);
haystack--; /* possible ANSI violation */
do
{
cl = *++haystack;
if (cl == '\0')
goto ret0;
}
while ((cl != bl) && (cl != bu));
// See if the rest of needle is a one-for-one match with this part of haystack:
cl = (UINT)(size_t)ltolower(*++needle);
if (cl == '\0') // Since needle consists of only one character, it is already a match as found above.
goto foundneedle;
cu = (UINT)(size_t)ltoupper(cl);
++needle;
goto jin;
for (;;)
{
register unsigned a;
register const unsigned char *rhaystack, *rneedle;
do
{
a = *++haystack;
if (a == '\0')
goto ret0;
if ((a == bl) || (a == bu))
break;
a = *++haystack;
if (a == '\0')
goto ret0;
shloop:
;
}
while ((a != bl) && (a != bu));
jin:
a = *++haystack;
if (a == '\0') // Remaining part of haystack is shorter than needle. No match.
goto ret0;
if ((a != cl) && (a != cu)) // This promising candidate is not a complete match.
goto shloop; // Start looking for another match on the first char of needle.
rhaystack = haystack-- + 1;
rneedle = needle;
a = (UINT)(size_t)ltolower(*rneedle);
if ((UINT)(size_t)ltolower(*rhaystack) == (int) a)
do
{
if (a == '\0')
goto foundneedle;
++rhaystack;
a = (UINT)(size_t)ltolower(*++needle);
if ((UINT)(size_t)ltolower(*rhaystack) != (int) a)
break;
if (a == '\0')
goto foundneedle;
++rhaystack;
a = (UINT)(size_t)ltolower(*++needle);
}
while ((UINT)(size_t)ltolower(*rhaystack) == (int) a);
needle = rneedle; /* took the register-poor approach */
if (a == '\0')
break;
} // for(;;)
} // if (bl != '\0')
foundneedle:
return (char*) haystack;
ret0:
return 0;
}
UINT StrReplace(char *aHaystack, char *aOld, char *aNew, StringCaseSenseType aStringCaseSense
, UINT aLimit, size_t aSizeLimit, char **aDest, size_t *aHaystackLength)
// Replaces all (or aLimit) occurrences of aOld with aNew in aHaystack.
// On success, it returns the number of replacements done (0 if none). On failure (out of memory), it returns 0
// (and if aDest isn't NULL, it also sets *aDest to NULL on failure).
//
// PARAMETERS:
// - aLimit: Specify UINT_MAX to have no restriction on the number of replacements. Otherwise, specify a number >=0.
// - aSizeLimit: Specify -1 to assume that aHaystack has enough capacity for any mode #1 replacement. Otherwise,
// specify the size limit (in either mode 1 or 2), but it must be >= length of aHaystack (simplifies the code).
// - aDest: If NULL, the function will operate in mode #1. Otherwise, it uses mode #2 (see further below).
// - aHaystackLength: If it isn't NULL, *aHaystackLength must be the length of aHaystack. HOWEVER, *aHaystackLength
// is then changed here to be the length of the result string so that caller can use it to improve performance.
//
// MODE 1 (when aDest==NULL): aHaystack is used as both the source and the destination (sometimes temporary memory
// is used for performance, but it's freed afterward and so transparent to the caller).
// When it passes in -1 for aSizeLimit (the deafult), caller must ensure that aHaystack has enough capacity to hold
// the new/replaced result. When non-NULL, aSizeLimit will be enforced by limiting the number of replacements to
// the available memory (i.e. any remamining replacements are simply not done and that part of haystack is unaltered).
//
// MODE 2 (when aDest!=NULL): If zero replacements are needed, we set *aDest to be aHaystack to indicate that no
// new memory was allocated. Otherwise, we store in *aDest the address of the new memory that holds the result.
// - The caller is responsible for any new memory in *aDest (freeing it, etc.)
// - The initial value of *aDest doesn't matter.
// - The contents of aHaystack isn't altered, not even if aOld_length==aNew_length (some callers rely on this).
//
// v1.0.45: This function was heavily revised to improve performance and flexibility. It has also made
// two other/related StrReplace() functions obsolete. Also, the code has been simplified to avoid doing
// a first pass through haystack to find out exactly how many replacements there are because that step
// nearly doubles the time required for the entire operation (in most cases). Its benefit was mainly in
// memory savings and avoidance of any reallocs since the initial alloc was always exactly right; however,
// testing shows that one or two reallocs are generally much quicker than doing the size-calculation phase
// because extra alloc'ing & memcpy'ing is much faster than an extra search through haystack for all the matches.
// Furthermore, the new approach minimizes reallocs by using smart prediction. Furthermore, the caller shrinks
// the result memory via _expand() to avoid having too much extra/overhang. These optimizations seem to make
// the new approach better than the old one in every way, but especially performance.
{
#define REPLACEMENT_MODE2 aDest // For readability.
// THINGS TO SET NOW IN CASE OF EARLY RETURN OR GOTO:
// Set up the input/output lengths:
size_t haystack_length = aHaystackLength ? *aHaystackLength : strlen(aHaystack); // For performance, use caller's length if it was provided.
size_t length_temp; // Just a placeholder/memory location used by the alias below.
size_t &result_length = aHaystackLength ? *aHaystackLength : length_temp; // Make an alias for convenience and maintainability (if this is an output parameter for our caller, this step takes care that in advance).
// Set up the output buffer:
char *result_temp; // In mode #1, holds temporary memory that is freed before we return.
char *&result = aDest ? *aDest : result_temp; // Make an alias for convenience and maintainability (if aDest is non-NULL, it's an output parameter for our caller, and this step takes care that in advance).
result = NULL; // It's allocated only upon first use to avoid a potentially massive allocation that might
result_length = 0; // be wasted and cause swapping (not to mention that we'll have better ability to estimate the correct total size after the first replacement is discovered).
size_t result_size = 0;
// Variables used by both replacement methods.
char *src, *match_pos;
// END OF INITIAL SETUP.
// From now on, result_length and result should be kept up-to-date because they may have been set up
// as output parameters above.
if (!(*aHaystack && *aOld))
{
// Nothing to do if aHaystack is blank. If aOld is blank, that is not supported because it would be an
// infinite loop. This policy is now largely due to backward compatibility because some other policy
// may have been better.
result = aHaystack; // Return unaltered string to caller in its output paremeter (result is an alias for *aDest).
result_length = haystack_length; // This is an alias for an output parameter, so update it for caller.
return 0; // Report "no replacements".
}
size_t aOld_length = strlen(aOld);
size_t aNew_length = strlen(aNew);
int length_delta = (int)(aNew_length - aOld_length); // Cast to int to avoid loss of unsigned. A negative delta means the replacment substring is smaller than what it's replacing.
if (aSizeLimit != -1) // Caller provided a size *restriction*, so if necessary reduce aLimit to stay within bounds. Compare directly to -1 due to unsigned.
{
int extra_room = (int)(aSizeLimit-1 - haystack_length); // Cast to int to preserve negatives.
if (extra_room < 0) // Caller isn't supposed to call it this way. To avoid having to complicate the
aLimit = 0; // calculations in the else-if below, allow no replacements in this case.
else if (length_delta > 0) // New-str is bigger than old-str. This is checked to avoid going negative or dividing by 0 below. A positive delta means length of new/replacement string is greater than that of what it's replacing.
{
UINT upper_limit = (UINT)(extra_room / length_delta);
if (aLimit > upper_limit)
aLimit = upper_limit;
}
//else length_delta <= 0, so there no overflow should be possible. Leave aLimit as-is.
}
if (!REPLACEMENT_MODE2) // Mode #1
{
if (!length_delta // old_len==new_len, so use in-place method because it's just as fast in this case but it avoids the extra memory allocation.
|| haystack_length < 5000) // ...or the in-place method will likely be faster, and an earlier stage has ensured there's no risk of overflow.
goto in_place_method; // "Goto" to avoid annoying indentation and long IF-blocks.
//else continue on because the extra-memory method will usually perform better than the in-place method.
// The extra-memory method is much faster than the in-place method when many replacements are needed because
// it avoids a memmove() to shift the remainder of the buffer up against the area of memory that
// will be replaced (which for the in-place method is done for every replacement). The savings
// can be enormous if aSource is very large, assuming the system can allocate the memory without swapping.
}
// Otherwise:
// Since above didn't jump to the in place method, either the extra-memory method is preferred or this is mode #2.
// Never use the in-place method for mode #2 because caller always wants a separate memory area used (for its
// purposes, the extra-memory method is probably just as fast or faster than in-place method).
// Below uses a temp var. because realloc() returns NULL on failure but leaves original block allocated.
// Note that if it's given a NULL pointer, realloc() does a malloc() instead.
char *realloc_temp;
#define STRREPLACE_REALLOC(size) \
{\
result_size = size;\
if ( !(realloc_temp = (char *)realloc(result, result_size)) )\
goto out_of_mem;\
result = realloc_temp;\
}
// Other variables used by the replacement loop:
size_t haystack_portion_length, new_result_length;
UINT replacement_count;
// Perform the replacement:
for (replacement_count = 0, src = aHaystack
; aLimit && (match_pos = strstr2(src, aOld, aStringCaseSense));) // Relies on short-circuit boolean order.
{
++replacement_count;
--aLimit;
haystack_portion_length = match_pos - src; // The length of the haystack section between the end of the previous match and the start of the current one.
// Using the required length calculated below, expand/realloc "result" if necessary.
new_result_length = result_length + haystack_portion_length + aNew_length;
if (new_result_length >= result_size) // Uses >= to allow room for terminator.
STRREPLACE_REALLOC(PredictReplacementSize(length_delta, replacement_count, aLimit, (int)haystack_length
, (int)new_result_length, (int)(match_pos - aHaystack))); // This will return if an alloc error occurs.
// Now that we know "result" has enough capacity, put the new text into it. The first step
// is to copy over the part of haystack that appears before the match.
if (haystack_portion_length)
{
memcpy(result + result_length, src, haystack_portion_length);
result_length += haystack_portion_length;
}
// Now append the replacement string in place of the old string.
if (aNew_length)
{
memcpy(result + result_length, aNew, aNew_length);
result_length += aNew_length;
}
//else omit it altogether; i.e. replace every aOld with the empty string.
// Set up src to be the position where the next iteration will start searching. For consistency with
// the in-place method, overlapping matches are not detected. For example, the replacement
// of all occurrences of ".." with ". ." in "..." would produce ". ..", not ". . .":
src = match_pos + aOld_length; // This has two purposes: 1) Since match_pos is about to be altered by strstr, src serves as a placeholder for use by the next iteration; 2) it's also used further below.
}
if (!replacement_count) // No replacements were done, so optimize by keeping the original (avoids a malloc+memcpy).
{
// The following steps are appropriate for both mode #1 and #2 (for simplicity and maintainability,
// they're all done unconditionally even though mode #1 might not require them all).
result = aHaystack; // Return unaltered string to caller in its output paremeter (result is an alias for *aDest).