laurikari · dag-erling · Sep 4, 2024 · Jul 25, 2024 · Jul 30, 2024 · Sep 4, 2024
diff --git a/lib/tre-parse.c b/lib/tre-parse.c
@@ -259,166 +259,164 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
 			int *items_size)
 {
   const tre_char_t *re = ctx->re;
-  reg_errcode_t status = REG_OK;
+  reg_errcode_t status;
   tre_ctype_t class = (tre_ctype_t)0;
+  tre_cint_t min = 0, max = 0;
   int i = *num_items;
   int max_i = *items_size;
   int skip;
 
   /* Build an array of the items in the bracket expression. */
-  while (status == REG_OK)
+  for (;;)
     {
       skip = 0;
       if (re == ctx->re_end)
 	{
-	  status = REG_EBRACK;
+	  return REG_EBRACK;
 	}
-      else if (*re == CHAR_RBRACKET && re > ctx->re)
+      if (*re == CHAR_RBRACKET && re > ctx->re)
 	{
 	  DPRINT(("tre_parse_bracket:	done: '%.*" STRF "'\n", REST(re)));
 	  re++;
 	  break;
 	}
-      else
+      class = (tre_ctype_t)0;
+      if (re + 2 < ctx->re_end
+	  && *(re + 1) == CHAR_MINUS && *(re + 2) != CHAR_RBRACKET)
 	{
-	  tre_cint_t min = 0, max = 0;
-
-	  class = (tre_ctype_t)0;
-	  if (re + 2 < ctx->re_end
-	      && *(re + 1) == CHAR_MINUS && *(re + 2) != CHAR_RBRACKET)
-	    {
-	      DPRINT(("tre_parse_bracket:  range: '%.*" STRF "'\n", REST(re)));
-	      min = *re;
-	      max = *(re + 2);
-	      re += 3;
-	      /* XXX - Should use collation order instead of encoding values
-		 in character ranges. */
-	      if (min > max)
-		status = REG_ERANGE;
-	    }
-	  else if (re + 1 < ctx->re_end
-		   && *re == CHAR_LBRACKET && *(re + 1) == CHAR_PERIOD)
-	    status = REG_ECOLLATE;
-	  else if (re + 1 < ctx->re_end
-		   && *re == CHAR_LBRACKET && *(re + 1) == CHAR_EQUAL)
-	    status = REG_ECOLLATE;
-	  else if (re + 1 < ctx->re_end
-		   && *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
+	  DPRINT(("tre_parse_bracket:  range: '%.*" STRF "'\n", REST(re)));
+	  min = *re;
+	  max = *(re + 2);
+	  re += 3;
+	  /* XXX - Should use collation order instead of encoding values
+	     in character ranges. */
+	  if (min > max)
+	    return REG_ERANGE;
+	}
+      else if (re + 1 < ctx->re_end
+	       && *re == CHAR_LBRACKET && *(re + 1) == CHAR_PERIOD)
+	return REG_ECOLLATE;
+      else if (re + 1 < ctx->re_end
+	       && *re == CHAR_LBRACKET && *(re + 1) == CHAR_EQUAL)
+	return REG_ECOLLATE;
+      else if (re + 1 < ctx->re_end
+	       && *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
+	{
+	  char tmp_str[64];
+	  const tre_char_t *endptr = re + 2;
+	  size_t len;
+	  DPRINT(("tre_parse_bracket:  class: '%.*" STRF "'\n", REST(re)));
+	  while (endptr < ctx->re_end && *endptr != CHAR_COLON)
+	    endptr++;
+	  if (endptr != ctx->re_end)
 	    {
-	      char tmp_str[64];
-	      const tre_char_t *endptr = re + 2;
-	      size_t len;
-	      DPRINT(("tre_parse_bracket:  class: '%.*" STRF "'\n", REST(re)));
-	      while (endptr < ctx->re_end && *endptr != CHAR_COLON)
-		endptr++;
-	      if (endptr != ctx->re_end)
-		{
-		  len = MIN(endptr - re - 2, 63);
+	      len = MIN(endptr - re - 2, 63);
 #ifdef TRE_WCHAR
-		  {
-		    tre_char_t tmp_wcs[64];
-		    wcsncpy(tmp_wcs, re + 2, len);
-		    tmp_wcs[len] = L'\0';
+	      {
+		tre_char_t tmp_wcs[64];
+		wcsncpy(tmp_wcs, re + 2, len);
+		tmp_wcs[len] = L'\0';
 #if defined HAVE_WCSRTOMBS
-		    {
-		      mbstate_t state;
-		      const tre_char_t *src = tmp_wcs;
-		      memset(&state, '\0', sizeof(state));
-		      len = wcsrtombs(tmp_str, &src, sizeof(tmp_str), &state);
-		    }
+		{
+		  mbstate_t state;
+		  const tre_char_t *src = tmp_wcs;
+		  memset(&state, '\0', sizeof(state));
+		  len = wcsrtombs(tmp_str, &src, sizeof(tmp_str), &state);
+		}
 #elif defined HAVE_WCSTOMBS
-		    len = wcstombs(tmp_str, tmp_wcs, 63);
+		len = wcstombs(tmp_str, tmp_wcs, 63);
 #endif /* defined HAVE_WCSTOMBS */
-		  }
+		if (len == (size_t)-1)
+		  return REG_ECTYPE;
+	      }
 #else /* !TRE_WCHAR */
-		  strncpy(tmp_str, (const char*)re + 2, len);
+	      strncpy(tmp_str, (const char*)re + 2, len);
 #endif /* !TRE_WCHAR */
-		  tmp_str[len] = '\0';
-		  DPRINT(("  class name: %s\n", tmp_str));
-		  class = tre_ctype(tmp_str);
-		  if (!class)
-		    status = REG_ECTYPE;
-		  /* Optimize character classes for 8 bit character sets. */
-		  if (status == REG_OK && ctx->mb_cur_max == 1)
-		    {
-		      status = tre_expand_ctype(ctx->mem, class, items,
-						&i, &max_i, ctx->cflags);
-		      class = (tre_ctype_t)0;
-		      skip = 1;
-		    }
-		  re = endptr + 2;
+	      tmp_str[len] = '\0';
+	      DPRINT(("  class name: %s\n", tmp_str));
+	      class = tre_ctype(tmp_str);
+	      if (!class)
+		return REG_ECTYPE;
+	      /* Optimize character classes for 8 bit character sets. */
+	      if (ctx->mb_cur_max == 1)
+		{
+		  status = tre_expand_ctype(ctx->mem, class, items,
+					    &i, &max_i, ctx->cflags);
+		  if (status != REG_OK)
+		    return status;
+		  class = (tre_ctype_t)0;
+		  skip = 1;
 		}
-	      else
-		status = REG_ECTYPE;
-	      min = 0;
-	      max = TRE_CHAR_MAX;
+	      re = endptr + 2;
 	    }
 	  else
-	    {
-	      DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
-	      if (*re == CHAR_MINUS && *(re + 1) != CHAR_RBRACKET
-		  && ctx->re != re)
-		/* Two ranges are not allowed to share and endpoint. */
-		status = REG_ERANGE;
-	      min = max = *re++;
-	    }
+	    return REG_ECTYPE;
+	  min = 0;
+	  max = TRE_CHAR_MAX;
+	}
+      else
+	{
+	  DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
+	  if (*re == CHAR_MINUS && *(re + 1) != CHAR_RBRACKET
+	      && ctx->re != re)
+	    /* Two ranges are not allowed to share and endpoint. */
+	    return REG_ERANGE;
+	  min = max = *re++;
+	}
 
+      if (class && negate)
+	if (*num_neg_classes >= MAX_NEG_CLASSES)
+	  return REG_ESPACE;
+	else
+	  neg_classes[(*num_neg_classes)++] = class;
+      else if (!skip)
+	{
+	  status = tre_new_item(ctx->mem, min, max, &i, &max_i, items);
 	  if (status != REG_OK)
-	    break;
+	    return status;
+	  ((tre_literal_t*)((*items)[i-1])->obj)->u.class = class;
+	}
 
-	  if (class && negate)
-	    if (*num_neg_classes >= MAX_NEG_CLASSES)
-	      status = REG_ESPACE;
-	    else
-	      neg_classes[(*num_neg_classes)++] = class;
-	  else if (!skip)
-	    {
-	      status = tre_new_item(ctx->mem, min, max, &i, &max_i, items);
-	      if (status != REG_OK)
-		break;
-	      ((tre_literal_t*)((*items)[i-1])->obj)->u.class = class;
-	    }
+      /* Add opposite-case counterpoints if REG_ICASE is present.
+	 This is broken if there are more than two "same" characters. */
+      if (ctx->cflags & REG_ICASE && !class && !skip)
+	{
+	  tre_cint_t cmin, ccurr;
 
-	  /* Add opposite-case counterpoints if REG_ICASE is present.
-	     This is broken if there are more than two "same" characters. */
-	  if (ctx->cflags & REG_ICASE && !class && status == REG_OK && !skip)
+	  DPRINT(("adding opposite-case counterpoints\n"));
+	  while (min <= max)
 	    {
-	      tre_cint_t cmin, ccurr;
-
-	      DPRINT(("adding opposite-case counterpoints\n"));
-	      while (min <= max)
+	      if (tre_islower(min))
 		{
-		  if (tre_islower(min))
-		    {
-		      cmin = ccurr = tre_toupper(min++);
-		      while (tre_islower(min) && tre_toupper(min) == ccurr + 1
-			     && min <= max)
-			ccurr = tre_toupper(min++);
-		      status = tre_new_item(ctx->mem, cmin, ccurr,
-					    &i, &max_i, items);
-		    }
-		  else if (tre_isupper(min))
-		    {
-		      cmin = ccurr = tre_tolower(min++);
-		      while (tre_isupper(min) && tre_tolower(min) == ccurr + 1
-			     && min <= max)
-			ccurr = tre_tolower(min++);
-		      status = tre_new_item(ctx->mem, cmin, ccurr,
-					    &i, &max_i, items);
-		    }
-		  else min++;
+		  cmin = ccurr = tre_toupper(min++);
+		  while (tre_islower(min) && tre_toupper(min) == ccurr + 1
+			 && min <= max)
+		    ccurr = tre_toupper(min++);
+		  status = tre_new_item(ctx->mem, cmin, ccurr,
+					&i, &max_i, items);
 		  if (status != REG_OK)
-		    break;
+		    return status;
 		}
-	      if (status != REG_OK)
-		break;
+	      else if (tre_isupper(min))
+		{
+		  cmin = ccurr = tre_tolower(min++);
+		  while (tre_isupper(min) && tre_tolower(min) == ccurr + 1
+			 && min <= max)
+		    ccurr = tre_tolower(min++);
+		  status = tre_new_item(ctx->mem, cmin, ccurr,
+					&i, &max_i, items);
+		  if (status != REG_OK)
+		    return status;
+		}
+	      else
+		min++;
 	    }
 	}
     }
   *num_items = i;
   *items_size = max_i;
   ctx->re = re;
-  return status;
+  return REG_OK;
 }
 
 static reg_errcode_t
@@ -1494,7 +1492,7 @@ tre_parse(tre_parse_ctx_t *ctx)
 		      /* Wide char. */
 		      char tmp[9]; /* max 8 hex digits + terminator */
 		      long val;
-		      int i = 0;
+		      size_t i = 0;
 		      ctx->re++;
 		      while (ctx->re_end - ctx->re >= 0)
 			{

diff --git a/tests/retest.c b/tests/retest.c
@@ -1105,6 +1105,7 @@ main(int argc, char **argv)
   test_comp("[[:xdigit:]]+", REG_EXTENDED, 0);
   test_exec("-0123456789ABCDEFabcdef", 0, REG_OK, 1, 23, END);
   test_comp("[[:bogus-character-class-name:]", REG_EXTENDED, REG_ECTYPE);
+  test_comp("[[:\xff:", REG_EXTENDED, REG_ECTYPE);
 
 
   /* Range expressions (assuming that the C locale is being used). */