-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhtmltxar.cpp
680 lines (584 loc) · 19.1 KB
/
htmltxar.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
#ifdef RCSID
static char RCSid[] =
"$Header: d:/cvsroot/tads/html/htmltxar.cpp,v 1.2 1999/05/17 02:52:22 MJRoberts Exp $";
#endif
/*
* Copyright (c) 1997 by Michael J. Roberts. All Rights Reserved.
*
* Please see the accompanying license file, LICENSE.TXT, for information
* on using and copying this software.
*/
/*
Name
htmltxar.cpp - text array
Function
Notes
Modified
09/23/97 MJRoberts - Creation
*/
#include <stdlib.h>
#include <assert.h>
#ifndef HTMLTXAR_H
#include "htmltxar.h"
#endif
CHtmlTextArray::CHtmlTextArray()
{
/* allocate the initial page pointer array */
page_entries_ = 128;
pages_ = (CHtmlTextArrayEntry *)th_malloc(page_entries_ *
sizeof(CHtmlTextArrayEntry));
assert(pages_ != 0);
/* allocate the first page */
alloc_first_page();
/* nothing consumed yet */
max_addr_ = 0;
mem_in_use_ = 0;
}
CHtmlTextArray::~CHtmlTextArray()
{
size_t i;
CHtmlTextArrayEntry *p;
/* delete each page we've allocated */
for (i = 0, p = pages_ ; i < pages_alloced_ ; ++i, ++p)
{
/* free this page if it was allocated */
if (p->text_ != 0)
th_free(p->text_);
}
/* delete the page array itself */
th_free(pages_);
}
/*
* clear the text array - deletes all pages, but doesn't delete the page
* list itself
*/
void CHtmlTextArray::clear()
{
size_t i;
CHtmlTextArrayEntry *p;
/* delete each page we've allocated */
for (i = 0, p = pages_ ; i < pages_alloced_ ; ++i, ++p)
{
/* free this page if it was allocated */
if (p->text_ != 0)
{
/* free the page */
th_free(p->text_);
/* forget about it */
p->text_ = 0;
}
}
/* reset everything */
pages_alloced_ = 0;
pages_in_use_ = 0;
max_addr_ = 0;
mem_in_use_ = 0;
/* allocate the first page */
alloc_first_page();
}
/*
* Remove a reference to a block of text previously allocated. If this
* leaves the page containing the text unreferenced, we'll delete the
* entire page.
*/
void CHtmlTextArray::delete_text(unsigned long addr, size_t len)
{
size_t pg;
/* get the containing page */
pg = get_page(addr);
/* remove a reference to the containing page */
--(pages_[pg].refs_);
/* subtract the size from the space-in-use marker on the page */
pages_[pg].space_in_use_ -= len;
/* deduct this space from the total memory in use */
mem_in_use_ -= len;
/*
* If that leaves the page unreferenced, delete it. Never delete
* the active (i.e., last) page, even if it's otherwise
* unreferenced, since we could create a new reference in it at any
* time.
*/
if (pages_[pg].refs_ == 0 && pg != pages_alloced_ - 1)
{
/* deduct anything remaining on this page from the memory in use */
mem_in_use_ -= pages_[pg].space_in_use_;
/* delete the page's memory */
th_free(pages_[pg].text_);
/* forget the pointer, since it's gone now */
pages_[pg].text_ = 0;
/* that's one less page in use */
--pages_in_use_;
}
}
/*
* allocate the first page
*/
void CHtmlTextArray::alloc_first_page()
{
pages_[0].text_ = (textchar_t *)th_malloc(HTML_TEXTARRAY_PAGESIZE);
pages_[0].used_ = 0;
pages_[0].refs_ = 0;
pages_[0].space_in_use_ = 0;
pages_[0].alloced_ = 0;
pages_alloced_ = 1;
pages_in_use_ = 1;
assert(pages_[0].text_ != 0);
}
/*
* Add the text to the array, counting the reference
*/
unsigned long CHtmlTextArray::append_text(const textchar_t *txt, size_t len)
{
unsigned long addr;
size_t pg;
/* add the text normally */
addr = store_text(txt, len);
/* count the reference to the page containing the text */
pg = get_page(addr);
++(pages_[pg].refs_);
/* return the address */
return addr;
}
/*
* Temporarily store text without committing the space
*/
unsigned long CHtmlTextArray::store_text_temp(const textchar_t *txt,
size_t len)
{
unsigned long addr;
/*
* Make sure the text will fit in the page in a single chunk by
* reserving space for it
*/
addr = reserve_space(len);
/*
* Note the maximum address assigned, regardless of whether the
* space is committed yet or not
*/
max_addr_ = addr + len;
/* note the amount reserved on the page */
pages_[last_page()].alloced_ += len;
/* copy the text into the page at the current offset */
if (len != 0)
memcpy(pages_[last_page()].text_ + last_page_ofs(), txt, len);
/* return the address */
return addr;
}
/*
* Store text, committing the space
*/
unsigned long CHtmlTextArray::store_text(const textchar_t *txt, size_t len)
{
unsigned long addr;
size_t pg;
/* store the text */
addr = store_text_temp(txt, len);
/* count the space in use on the page */
pg = get_page(addr);
pages_[pg].space_in_use_ += len;
/* count this space in the total memory we're using */
mem_in_use_ += len;
/*
* commit the storage by moving the page's free space pointer past
* the text
*/
inc_last_page_ofs(len);
/* return the address */
return addr;
}
/*
* Reserve space for a chunk of text, ensuring that the chunk will be
* stored contiguously on a single page
*/
unsigned long CHtmlTextArray::reserve_space(size_t len)
{
/* it must fit on a page */
assert(len <= HTML_TEXTARRAY_PAGESIZE);
/*
* We're required to keep each text block contiguous, so make sure
* we have room for this entire block in the current page; if not,
* we need to move on to the next page.
*/
if (len > HTML_TEXTARRAY_PAGESIZE - last_page_ofs())
{
/* make sure we have room in the top-level page array */
if (pages_alloced_ == page_entries_)
{
/* allocate more entries */
page_entries_ += 128;
pages_ = (CHtmlTextArrayEntry *)th_realloc(pages_,
page_entries_ * sizeof(CHtmlTextArrayEntry));
assert(pages_ != 0);
}
/* allocate a new page */
pages_[pages_alloced_].text_ = (textchar_t *)
th_malloc(HTML_TEXTARRAY_PAGESIZE);
pages_[pages_alloced_].used_ = 0;
pages_[pages_alloced_].refs_ = 0;
pages_[pages_alloced_].alloced_ = 0;
pages_[pages_alloced_].space_in_use_ = 0;
++pages_alloced_;
++pages_in_use_;
}
/*
* Calculate the address. Treating the pages as though they
* constituted a single linear array by pretending each array were
* allocated directly after the previous one in memory, we'd simply
* use the page location times the page size plus the page offset.
* Since the pages are of a fixed size, we can later decode that
* formulation to find the actual location in memory.
*/
return make_addr(last_page(), last_page_ofs());
}
/*
* Increment an offset so that it points to another valid offset
*/
unsigned long CHtmlTextArray::inc_ofs(unsigned long addr) const
{
size_t pg;
size_t ofs;
/* get the page and page offset of this address */
pg = get_page(addr);
ofs = get_page_ofs(addr);
/*
* if the address is at the end of its page, we need to move to the
* next page, otherwise just go to the next offset within this page
*/
if (ofs >= pages_[pg].alloced_)
{
/*
* if we're at the end of the whole array, return the same
* address
*/
if (pg + 1 >= pages_alloced_)
return addr;
/* return the first address in the next page */
return make_addr(pg + 1, 0);
}
else
return addr + 1;
}
/*
* Decrement an offset so that it points to another valid offset
*/
unsigned long CHtmlTextArray::dec_ofs(unsigned long addr) const
{
size_t pg;
size_t ofs;
/* get the page and page offset of this address */
pg = get_page(addr);
ofs = get_page_ofs(addr);
/*
* if we're at the start of this page, we need to move to the
* previous page, otherwise just move down a character on this page
*/
if (ofs == 0)
{
/* if we're at the very beginning, return the same value */
if (addr == 0)
return addr;
/* return the last offset in the previous page */
return make_addr(pg - 1, pages_[pg - 1].used_ - 1);
}
else
return addr - 1;
}
/*
* Determine how many characters are between two text offsets. Since
* text offsets are not assigned continuously, it is possible that the
* difference of the two offsets overstates the number of characters
* between the two offset.
*/
unsigned long CHtmlTextArray::get_char_count(unsigned long startaddr,
unsigned long endaddr) const
{
size_t startpg, endpg, curpg;
size_t startofs, endofs, curofs;
unsigned long siz;
/* get the page and offset of the start and end addresses */
startpg = get_page(startaddr);
startofs = get_page_ofs(startaddr);
endpg = get_page(endaddr);
endofs = get_page_ofs(endaddr);
/* scan pages for the size */
for (siz = 0, curpg = startpg, curofs = startofs ;; )
{
/*
* if we're on the last page, get the distance from the ending
* offset to the current offset, and we're done
*/
if (curpg >= endpg || curpg >= pages_alloced_)
{
siz += endofs - curofs;
break;
}
/* add in the amount remaining on the current page */
siz += pages_[curpg].alloced_;
/* move on to the beginning of the next page */
++curpg;
curofs = 0;
}
/* return the size we calculated */
return siz;
}
/*
* Get a pointer to a chunk of characters starting at the given offset.
* Returns a pointer to the characters, and sets *len to the number of
* characters (up to a maximum of maxlen) in the chunk. This allows a
* caller to traverse the possibly discontinuous array of characters.
*/
const textchar_t *CHtmlTextArray::get_text_chunk(unsigned long *addr,
size_t *len_in_chunk, unsigned long maxlen) const
{
size_t pg;
size_t ofs;
/* get the page and offset of the given address */
pg = get_page(*addr);
ofs = get_page_ofs(*addr);
/* if we're past the end of this page, move on to the next */
if (ofs > pages_[pg].alloced_)
{
/* move to the start of the next page */
++pg;
ofs = 0;
}
/* for the size, use everything on the current page, up to the limit */
*len_in_chunk = pages_[pg].alloced_ - ofs;
if (*len_in_chunk > maxlen)
{
/* limit the size */
*len_in_chunk = maxlen;
/*
* we'll still have text left in this page after returning this
* chunk -- the next address will still be in this page
*/
*addr = make_addr(pg, ofs + maxlen);
}
else
{
/*
* we exhausted this page with this chunk -- the next address
* will be at the start of the next page
*/
*addr = make_addr(pg + 1, 0);
}
/* return the pointer to the page at the appropriate offset */
return pages_[pg].text_ + ofs;
}
/*
* is the given character a word character, for the purposes of a
* whole-word search?
*/
static int is_word_char(textchar_t c)
{
return is_alpha(c) || is_digit(c);
}
/*
* Search for text
*/
int CHtmlTextArray::search(const textchar_t *txt, size_t txtlen,
int exact_case, int whole_word,
int wrap, int dir,
unsigned long startofs,
unsigned long *match_start,
unsigned long *match_end)
{
const textchar_t *cur_p;
const textchar_t *cur_start;
size_t cur_rem;
size_t cur_pg;
size_t cur_ofs;
size_t start_pg;
size_t start_ofs;
/* get the page and offset of the starting address */
start_pg = cur_pg = get_page(startofs);
start_ofs = cur_ofs = get_page_ofs(startofs);
cur_rem = pages_[cur_pg].used_ - cur_ofs;
cur_p = cur_start = pages_[cur_pg].text_ + cur_ofs;
/*
* if this page has been deleted, set the remaining amount on the page
* to zero so that we know we have no text to check here
*/
if (pages_[cur_pg].text_ == 0)
cur_rem = cur_ofs = 0;
/* keep going until we run out of text */
for (;;)
{
size_t src_rem;
const textchar_t *src_p;
const textchar_t *arr_p;
size_t arr_rem;
size_t arr_pg;
size_t arr_ofs;
/* if we're going backwards, decrement first */
if (dir == -1)
{
/*
* Move back to the previous character on this page. If
* we're already at the start of the page, move to the
* previous page.
*/
if (cur_ofs == 0)
{
/* go back to the last character of the previous page */
if (cur_pg == 0)
{
if (wrap)
{
/* go back to the end of the buffer */
cur_pg = pages_alloced_;
}
else
{
/*
* we've reached the start of the buffer without
* finding the target, and wrapping is not
* allowed, so the search has failed
*/
return FALSE;
}
}
/* set up at the end of the previous page */
--cur_pg;
cur_p = pages_[cur_pg].text_ + pages_[cur_pg].used_;
cur_rem = 0;
cur_ofs = pages_[cur_pg].used_;
/* if the page has been deleted, there's no text to check */
if (pages_[cur_pg].text_ == 0)
cur_rem = cur_ofs = 0;
}
else
{
/* back up a character on the current page */
--cur_ofs;
--cur_p;
++cur_rem;
}
/* check to see if we're back where we started */
if (cur_pg == start_pg && cur_ofs == start_ofs)
return FALSE;
}
/* set up with the current array page */
arr_p = cur_p;
arr_rem = cur_rem;
arr_pg = cur_pg;
arr_ofs = cur_ofs;
/*
* We may need to do the comparison in chunks, because the
* current page may only hold a portion of the length of the
* target string. Keep scanning on subsequent pages as
* necessary.
*/
for (src_rem = txtlen, src_p = txt ;; )
{
size_t cur_comp_len;
/* determine how much we can compare on the current page */
cur_comp_len = src_rem;
if (cur_comp_len > arr_rem)
cur_comp_len = arr_rem;
/* see if this one matches - if not, stop looking */
if ((exact_case
? memcmp(src_p, arr_p, cur_comp_len)
: memicmp(src_p, arr_p, cur_comp_len)) != 0)
break;
/* advance our remainder pointers */
src_rem -= cur_comp_len;
src_p += cur_comp_len;
/* if we're out of source, we have a match */
if (src_rem == 0)
{
unsigned long a0 = make_addr(cur_pg, cur_ofs);
unsigned long a1 = make_addr(arr_pg, arr_ofs + cur_comp_len);
/*
* if this is a whole-word match, make sure we start and
* end on word boundaries
*/
if (whole_word
&& ((a0 != 0 && is_word_char(get_char(a0 - 1)))
|| (a1 < get_max_addr()
&& is_word_char(get_char(a1)))))
{
/*
* we have a word character on one side or the other,
* so it's not a match
*/
break;
}
/* return the match */
*match_start = a0;
*match_end = a1;
return TRUE;
}
/*
* Advance to the next page. If there isn't another page,
* we can stop searching, because we don't have enough text
* left to contain the target.
*/
++arr_pg;
if (arr_pg >= pages_alloced_)
break;
/* set up at the start of the next page */
arr_p = pages_[arr_pg].text_;
arr_rem = pages_[arr_pg].used_;
arr_ofs = 0;
/* if the page has been deleted, there's no text to check */
if (pages_[cur_pg].text_ == 0)
arr_rem = 0;
}
/* if we're going forwards, increment after an unsuccessful match */
if (dir == 1)
{
/* move ahead to the next character on this page, if possible */
if (cur_rem != 0)
{
++cur_ofs;
++cur_p;
--cur_rem;
/* check to see if we're back where we started */
if (cur_pg == start_pg && cur_ofs == start_ofs)
return FALSE;
}
/*
* If we've run out of text on the current page, move on to
* the next page.
*/
if (cur_rem == 0)
{
/* move to the start of the next page */
++cur_pg;
/* if there isn't another page, we can't go any further */
if (cur_pg >= pages_alloced_)
{
/*
* if they want us to wrap, go back to the start of
* the buffer
*/
if (wrap)
{
/* go back to the start of the buffer */
cur_pg = 0;
}
else
{
/*
* we've reached the end of the buffer without
* finding the target, and wrapping is not
* allowed, so the search has failed
*/
return FALSE;
}
}
/* set up at the start of this page */
cur_p = pages_[cur_pg].text_;
cur_rem = pages_[cur_pg].used_;
cur_ofs = 0;
/* if the page has been deleted, there's no text to check */
if (pages_[cur_pg].text_ == 0)
cur_rem = cur_ofs = 0;
/* check to see if we're back where we started */
if (cur_pg == start_pg && cur_ofs == start_ofs)
return FALSE;
}
}
}
}