Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for issue #18. #20

Merged
merged 1 commit into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions src/main/c/JavaPcre.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,12 @@ typedef struct CompileData_TAG {
int erroroffset;
} CompileData;

typedef struct GroupData_TAG {
char** names;
int* namesnum;
int namescount;
} GroupData;

// regex struct/array implementation starts here
typedef struct RegexStruct_TAG {
int numVals;
Expand Down Expand Up @@ -380,6 +386,93 @@ int pcre2_check_utf8(char temp){
}
}

// Provides method to access capture group info after compile but before matching. PCRE2_INFO_NAMETABLE is available via pcre2_pattern_info.
GroupData pcre2_get_info_group(pcre2_code *re){
PCRE2_SPTR name_table;
uint32_t namecount;
uint32_t name_entry_size;
GroupData temp;
int i;

(void)pcre2_pattern_info(
re, /* the compiled pattern */
PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
&namecount); /* where to put the answer */
// printf("TESTING NAMECOUNT: %d \r\n", namecount);
if (namecount == 0){
temp.namescount = namecount;
temp.names = (char**)malloc(sizeof(char*) * 1);
temp.namesnum = (int*)malloc(sizeof(int) * 1);
if (temp.names == NULL || temp.namesnum == NULL) {
printf("Error: Out of memory\r\n");
exit(-1);
}
memset(temp.names, 0, sizeof(char*) * 1);
memset(temp.namesnum, 0, sizeof(int) * 1);
// printf("TESTING NAMESCOUNT: %d \r\n", temp.namescount);
} else
{
PCRE2_SPTR tabptr;

/* Before we can access the substrings, we must extract the table for
translating names to numbers, and the size of each entry in the table. */

(void)pcre2_pattern_info(
re, /* the compiled pattern */
PCRE2_INFO_NAMETABLE, /* address of the table */
&name_table); /* where to put the answer */

(void)pcre2_pattern_info(
re, /* the compiled pattern */
PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
&name_entry_size); /* where to put the answer */

tabptr = name_table;
temp.namescount = namecount;
temp.namesnum = (int*)malloc(sizeof(int) * temp.namescount);
temp.names = (char**)malloc(sizeof(char*) * temp.namescount);
if (temp.names == NULL || temp.namesnum == NULL) {
printf("Error: Out of memory\r\n");
exit(-1);
}
for (i = 0; i < namecount; i++)
{
int n = (tabptr[0] << 8) | tabptr[1]; // << is a bitwise left shift operator.
temp.namesnum[i] = n; // stores the numerical value for name-number pairing to the struct.
// name table is stored in this format:
// 00 01 d a t e 00 ??
// 00 05 d a y 00 ?? ??
// etc.
// first two bytes (00 and 01) are the number of the capturing parenthesis, and ?? is an undefined byte.
// last 00 byte seems to be the zero termination of the string.
temp.names[i] = (char*)malloc(sizeof(char) * ((int)name_entry_size - 2));
if (temp.names[i] == NULL) {
printf("Error: Out of memory\r\n");
exit(-1);
}
memset(temp.names[i], 0, sizeof(char) * ((int)name_entry_size - 2)); // initializes the string array with null values.
memcpy(temp.names[i], (char *)(tabptr + 2), (int)(name_entry_size - 3));

tabptr += name_entry_size;
}
}
// printf("TESTING NAMESCOUNT: %d \r\n", temp.namescount);
return temp;
}

void free_group_data(GroupData sVal) {
// printf("(C) cleaning up GroupData sVal.names[loop]...\n");
int loop;
for (loop=0; loop<sVal.namescount; loop++){
free(sVal.names[loop]);
}
if (sVal.namescount>0){
free(sVal.names);
free(sVal.namesnum);
}
}


// this function contains matching for a single match
RegexStruct pcre2_single_jmatch(char *b, pcre2_code *re, int offset, MatchOptionsStruct *temp, pcre2_match_context *mcontext){
pcre2_match_data *match_data;
Expand Down
46 changes: 38 additions & 8 deletions src/main/java/com/teragrep/jpr_01/JavaPcre.java
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,21 @@ public static class ByValue extends RegexStruct implements Structure.ByValue {}
public int rc;
}

@FieldOrder({ "names", "namesnum", "namescount" })
class GroupData extends Structure {
public static class ByValue extends GroupData implements Structure.ByValue {}
public Pointer names; // char**
public Pointer namesnum;
public int namescount;
}

void RegexStruct_cleanup(RegexStruct.ByValue sVal);

CompileData.ByValue pcre2_jcompile(String pattern, int i, OptionsStruct options, Pointer ccontext); // returns struct containing compiled pattern re

GroupData.ByValue pcre2_get_info_group(Pointer re); // returns struct containing group info
void free_group_data(GroupData.ByValue sVal); // releases the memory allocated to the group info struct.

RegexStruct.ByValue pcre2_single_jmatch(String subject, Pointer re, int offset, MatchOptionsStruct match_options, Pointer mcontext); // returns pointer to a single match data.
ErrorStruct.ByValue pcre2_translate_error_code(int errorcode);

Expand Down Expand Up @@ -183,9 +194,27 @@ public JavaPcre(){
mcontext = null; // default value for when context is not used in match
matchfound = false;
JPCRE2_ERROR_NOMATCH = false;
name_table = new HashMap<>();
}
// Make another constructor if/when memory management is implemented to the context functions.

public void set_name_table(LibJavaPcre.GroupData.ByValue groupData) {
if (groupData.namescount > 0) {
if (!name_table.isEmpty()) {
name_table.clear();
}
final String[] regex_names = groupData.names.getStringArray(0, groupData.namescount);
final int[] namesnum = groupData.namesnum.getIntArray(0, groupData.namescount);
for (int namesloop = 0; namesloop < groupData.namescount; namesloop++) {
name_table.put(regex_names[namesloop], namesnum[namesloop]);
}
} else {
if (!name_table.isEmpty()) {
name_table.clear();
}
}
}

public Map<String, Integer> get_name_table(){
return name_table;
}
Expand Down Expand Up @@ -340,6 +369,12 @@ public void compile_java(String pat){
LibJavaPcre.INSTANCE.errorcleanup(p);
throw new PatternSyntaxException(val, pattern, comp_val.erroroffset);
}
else{
// initialize the groupData and translate it to name_table format.
LibJavaPcre.GroupData.ByValue groupData = LibJavaPcre.INSTANCE.pcre2_get_info_group(re);
set_name_table(groupData);
LibJavaPcre.INSTANCE.free_group_data(groupData);
}
}

public boolean checkmatchoptionzero(){
Expand All @@ -354,7 +389,6 @@ public void singlematch_java(String a, int b){
if (a == null) {
throw new IllegalStateException("Subject is null");
}
name_table = new LinkedHashMap<>();
subject = a;
offset = b;
match_table = new LinkedHashMap<>();
Expand Down Expand Up @@ -395,13 +429,6 @@ public void singlematch_java(String a, int b){
matchfound = true;
final String[] regex_vals = regex_val.vals.getStringArray(0, regex_val.numVals);
final int[] regex_ovector = regex_val.ovector.getIntArray(0, (regex_val.numVals + 2));
if (regex_val.namescount > 0) {
final String[] regex_names = regex_val.names.getStringArray(0, regex_val.namescount);
final int[] namesnum = regex_val.namesnum.getIntArray(0, regex_val.namescount);
for (int namesloop = 0; namesloop < regex_val.namescount; namesloop++) {
name_table.put(regex_names[namesloop], namesnum[namesloop]);
}
}
for (int regexloop = 0; regexloop < regex_val.numVals; regexloop++) {
match_table.put(ind++, regex_vals[regexloop]);
}
Expand All @@ -421,6 +448,9 @@ public void jcompile_free(){
if (re != null){
LibJavaPcre.INSTANCE.pcre2_jcompile_free(re);
re = null;
if (!name_table.isEmpty()) {
name_table.clear(); // clear name_table that is constructed using the compiled pattern.
}
}else{
throw new IllegalStateException("No data to free");
}
Expand Down
17 changes: 17 additions & 0 deletions src/test/java/com/teragrep/jpr_01/JavaPcreIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Assertions;

import java.util.HashMap;
import java.util.Map;

import org.slf4j.Logger;
Expand Down Expand Up @@ -164,6 +165,22 @@ void pcre2_compile_test() {
Assertions.assertEquals(null, s1.get_re());
}

@Test
void pcre2_compile_test_with_group_data() {
JavaPcre s1 = new JavaPcre();
s1.compile_java("From:(?<nimi>[^@]+)@(?<sposti>[^\r]+)");
Map<String, Integer> expected_nameTable = new HashMap<>();
expected_nameTable.put("nimi", 1);
expected_nameTable.put("sposti", 2);
Assertions.assertNotEquals(null, s1.get_re());
Map<String, Integer> nameTable = s1.get_name_table();
Assertions.assertEquals(2, nameTable.size());
Assertions.assertEquals(expected_nameTable, nameTable);
s1.jcompile_free();
Assertions.assertEquals(null, s1.get_re());
Assertions.assertEquals(0, nameTable.size());
}

@Test
void pcre2_translator_test() {
// broken
Expand Down
Loading