Skip to content

Commit

Permalink
Merge pull request #105 from dag-erling/des/position
Browse files Browse the repository at this point in the history
Postpone position assignment
  • Loading branch information
dag-erling authored Jul 30, 2024
2 parents ca34130 + 2e5030a commit 9c098ea
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 103 deletions.
4 changes: 2 additions & 2 deletions lib/tre-ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
}

tre_ast_node_t *
tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max)
{
tre_ast_node_t *node;
tre_literal_t *lit;
Expand All @@ -44,7 +44,7 @@ tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
lit = node->obj;
lit->code_min = code_min;
lit->code_max = code_max;
lit->position = position;
lit->position = -1;

return node;
}
Expand Down
2 changes: 1 addition & 1 deletion lib/tre-ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ tre_ast_node_t *
tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size);

tre_ast_node_t *
tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position);
tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max);

tre_ast_node_t *
tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
Expand Down
106 changes: 50 additions & 56 deletions lib/tre-compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
c = tre_mem_alloc(mem, sizeof(*c));
if (c == NULL)
return REG_ESPACE;
c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
c->left = tre_ast_new_literal(mem, TAG, tag_id);
if (c->left == NULL)
return REG_ESPACE;
c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
Expand Down Expand Up @@ -78,7 +78,7 @@ tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
c = tre_mem_alloc(mem, sizeof(*c));
if (c == NULL)
return REG_ESPACE;
c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
c->right = tre_ast_new_literal(mem, TAG, tag_id);
if (c->right == NULL)
return REG_ESPACE;
c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
Expand Down Expand Up @@ -711,7 +711,7 @@ tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
tag_directions[max] = TRE_TAG_MAXIMIZE;
first_tag = 0;
}
*result = tre_ast_new_literal(mem, min, max, pos);
*result = tre_ast_new_literal(mem, min, max);
if (*result == NULL)
status = REG_ESPACE;

Expand Down Expand Up @@ -798,8 +798,7 @@ typedef enum {
iteration count to a catenated sequence of copies of the node. */
static reg_errcode_t
tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
int *position, tre_tag_direction_t *tag_directions,
int *max_depth)
tre_tag_direction_t *tag_directions, int *max_depth)
{
reg_errcode_t status = REG_OK;
int bottom = tre_stack_num_objects(stack);
Expand Down Expand Up @@ -949,7 +948,7 @@ tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
seq2 = copy;
if (seq2 == NULL)
return REG_ESPACE;
tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
tmp = tre_ast_new_literal(mem, EMPTY, -1);
if (tmp == NULL)
return REG_ESPACE;
seq2 = tre_ast_new_union(mem, tmp, seq2);
Expand Down Expand Up @@ -983,12 +982,12 @@ tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
int *old_params;

tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
tmp_l = tre_ast_new_literal(mem, PARAMETER, 0);
if (!tmp_l)
return REG_ESPACE;
((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
tmp_r = tre_ast_new_literal(mem, PARAMETER, 0);
if (!tmp_r)
return REG_ESPACE;
old_params = tre_mem_alloc(mem, sizeof(*old_params)
Expand Down Expand Up @@ -1028,19 +1027,9 @@ tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
}
}

*position += pos_add_total;

/* `max_pos' should never be larger than `*position' if the above
code works, but just an extra safeguard let's make sure
`*position' is set large enough so enough memory will be
allocated for the transition table. */
if (max_pos > *position)
*position = max_pos;

#ifdef TRE_DEBUG
DPRINT(("Expanded AST:\n"));
tre_ast_print(ast);
DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
#endif

return status;
Expand Down Expand Up @@ -1305,33 +1294,36 @@ tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,


typedef enum {
NFL_RECURSE,
NFL_POST_UNION,
NFL_POST_CATENATION,
NFL_POST_ITERATION
} tre_nfl_stack_symbol_t;
NPFL_RECURSE,
NPFL_POST_UNION,
NPFL_POST_CATENATION,
NPFL_POST_ITERATION
} tre_npfl_stack_symbol_t;


/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
the nodes of the AST `tree'. */
/* Computes and fills in the fields `nullable', `position`, `firstpos',
and `lastpos' for the nodes of the AST `tree'; `nextpos' points to an
integer indicating the next available position, and will be updated on
return to reflect the number of additional positions assigned. */
static reg_errcode_t
tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
tre_compute_npfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
int *nextpos)
{
int bottom = tre_stack_num_objects(stack);

STACK_PUSHR(stack, voidptr, tree);
STACK_PUSHR(stack, int, NFL_RECURSE);
STACK_PUSHR(stack, int, NPFL_RECURSE);

while (tre_stack_num_objects(stack) > bottom)
{
tre_nfl_stack_symbol_t symbol;
tre_npfl_stack_symbol_t symbol;
tre_ast_node_t *node;

symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
symbol = (tre_npfl_stack_symbol_t)tre_stack_pop_int(stack);
node = tre_stack_pop_voidptr(stack);
switch (symbol)
{
case NFL_RECURSE:
case NPFL_RECURSE:
switch (node->type)
{
case LITERAL:
Expand All @@ -1342,6 +1334,7 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
/* Back references: nullable = false, firstpos = {i},
lastpos = {i}. */
node->nullable = 0;
lit->position = (*nextpos)++;
node->firstpos = tre_set_one(mem, lit->position, 0,
TRE_CHAR_MAX, 0, NULL, -1);
if (!node->firstpos)
Expand Down Expand Up @@ -1369,6 +1362,7 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
/* Literal at position i: nullable = false, firstpos = {i},
lastpos = {i}. */
node->nullable = 0;
lit->position = (*nextpos)++;
node->firstpos =
tre_set_one(mem, lit->position, (int)lit->code_min,
(int)lit->code_max, 0, NULL, -1);
Expand All @@ -1389,36 +1383,36 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
/* Compute the attributes for the two subtrees, and after that
for this node. */
STACK_PUSHR(stack, voidptr, node);
STACK_PUSHR(stack, int, NFL_POST_UNION);
STACK_PUSHR(stack, int, NPFL_POST_UNION);
STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
STACK_PUSHR(stack, int, NFL_RECURSE);
STACK_PUSHR(stack, int, NPFL_RECURSE);
STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
STACK_PUSHR(stack, int, NFL_RECURSE);
STACK_PUSHR(stack, int, NPFL_RECURSE);
break;

case CATENATION:
/* Compute the attributes for the two subtrees, and after that
for this node. */
STACK_PUSHR(stack, voidptr, node);
STACK_PUSHR(stack, int, NFL_POST_CATENATION);
STACK_PUSHR(stack, int, NPFL_POST_CATENATION);
STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
STACK_PUSHR(stack, int, NFL_RECURSE);
STACK_PUSHR(stack, int, NPFL_RECURSE);
STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
STACK_PUSHR(stack, int, NFL_RECURSE);
STACK_PUSHR(stack, int, NPFL_RECURSE);
break;

case ITERATION:
/* Compute the attributes for the subtree, and after that for
this node. */
STACK_PUSHR(stack, voidptr, node);
STACK_PUSHR(stack, int, NFL_POST_ITERATION);
STACK_PUSHR(stack, int, NPFL_POST_ITERATION);
STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
STACK_PUSHR(stack, int, NFL_RECURSE);
STACK_PUSHR(stack, int, NPFL_RECURSE);
break;
}
break; /* end case: NFL_RECURSE */
break; /* end case: NPFL_RECURSE */

case NFL_POST_UNION:
case NPFL_POST_UNION:
{
tre_union_t *uni = (tre_union_t *)node->obj;
node->nullable = uni->left->nullable || uni->right->nullable;
Expand All @@ -1433,7 +1427,7 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
break;
}

case NFL_POST_ITERATION:
case NPFL_POST_ITERATION:
{
tre_iteration_t *iter = (tre_iteration_t *)node->obj;

Expand All @@ -1446,7 +1440,7 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
break;
}

case NFL_POST_CATENATION:
case NPFL_POST_CATENATION:
{
int num_tags, *tags, assertions, params_seen;
int *params;
Expand Down Expand Up @@ -1839,7 +1833,6 @@ tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
return errcode;
}


#define ERROR_EXIT(err) \
do \
{ \
Expand All @@ -1864,6 +1857,7 @@ tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
tre_tag_direction_t *tag_directions = NULL;
reg_errcode_t errcode;
tre_mem_t mem;
int numpos = 0;

/* Parse context. */
tre_parse_ctx_t parse_ctx;
Expand Down Expand Up @@ -1970,8 +1964,8 @@ tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
}

/* Expand iteration nodes. */
errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
tag_directions, &tnfa->params_depth);
errcode = tre_expand_ast(mem, stack, tree, tag_directions,
&tnfa->params_depth);
if (errcode != REG_OK)
ERROR_EXIT(errcode);

Expand All @@ -1980,37 +1974,37 @@ tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
for example "a*" or "ab*". Figure out a simple way to detect
this possibility. */
tmp_ast_l = tree;
tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
tmp_ast_r = tre_ast_new_literal(mem, 0, 0);
if (tmp_ast_r == NULL)
ERROR_EXIT(REG_ESPACE);

tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
if (tree == NULL)
ERROR_EXIT(REG_ESPACE);

errcode = tre_compute_npfl(mem, stack, tree, &numpos);
if (errcode != REG_OK)
ERROR_EXIT(errcode);

#ifdef TRE_DEBUG
tre_ast_print(tree);
DPRINT(("Number of states: %d\n", parse_ctx.position));
DPRINT(("Number of states: %d\n", numpos));
#endif /* TRE_DEBUG */

errcode = tre_compute_nfl(mem, stack, tree);
if (errcode != REG_OK)
ERROR_EXIT(errcode);

counts = xmalloc(sizeof(int) * parse_ctx.position);
counts = xmalloc(sizeof(int) * numpos);
if (counts == NULL)
ERROR_EXIT(REG_ESPACE);

offs = xmalloc(sizeof(int) * parse_ctx.position);
offs = xmalloc(sizeof(int) * numpos);
if (offs == NULL)
ERROR_EXIT(REG_ESPACE);

for (i = 0; i < parse_ctx.position; i++)
for (i = 0; i < numpos; i++)
counts[i] = 0;
tre_ast_to_tnfa(tree, NULL, counts, NULL);

add = 0;
for (i = 0; i < parse_ctx.position; i++)
for (i = 0; i < numpos; i++)
{
offs[i] = add;
add += counts[i] + 1;
Expand Down Expand Up @@ -2148,7 +2142,7 @@ tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)

tnfa->num_transitions = add;
tnfa->final = transitions + offs[tree->lastpos[0].position];
tnfa->num_states = parse_ctx.position;
tnfa->num_states = numpos;
tnfa->cflags = cflags;

DPRINT(("final state %p\n", (void *)tnfa->final));
Expand Down
Loading

0 comments on commit 9c098ea

Please sign in to comment.