diff --git a/src/main/c/JavaPcre.c b/src/main/c/JavaPcre.c index 981f00e..6348cff 100644 --- a/src/main/c/JavaPcre.c +++ b/src/main/c/JavaPcre.c @@ -99,6 +99,12 @@ typedef struct CompileData_TAG { int erroroffset; } CompileData; +typedef struct GroupData_TAG { + char** names; + int* namesnum; + int namescount; +} GroupData; + // regex struct/array implementation starts here typedef struct RegexStruct_TAG { int numVals; @@ -380,6 +386,93 @@ int pcre2_check_utf8(char temp){ } } +// Provides method to access capture group info after compile but before matching. PCRE2_INFO_NAMETABLE is available via pcre2_pattern_info. +GroupData pcre2_get_info_group(pcre2_code *re){ + PCRE2_SPTR name_table; + uint32_t namecount; + uint32_t name_entry_size; + GroupData temp; + int i; + + (void)pcre2_pattern_info( + re, /* the compiled pattern */ + PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */ + &namecount); /* where to put the answer */ + // printf("TESTING NAMECOUNT: %d \r\n", namecount); + if (namecount == 0){ + temp.namescount = namecount; + temp.names = (char**)malloc(sizeof(char*) * 1); + temp.namesnum = (int*)malloc(sizeof(int) * 1); + if (temp.names == NULL || temp.namesnum == NULL) { + printf("Error: Out of memory\r\n"); + exit(-1); + } + memset(temp.names, 0, sizeof(char*) * 1); + memset(temp.namesnum, 0, sizeof(int) * 1); + // printf("TESTING NAMESCOUNT: %d \r\n", temp.namescount); + } else + { + PCRE2_SPTR tabptr; + + /* Before we can access the substrings, we must extract the table for + translating names to numbers, and the size of each entry in the table. */ + + (void)pcre2_pattern_info( + re, /* the compiled pattern */ + PCRE2_INFO_NAMETABLE, /* address of the table */ + &name_table); /* where to put the answer */ + + (void)pcre2_pattern_info( + re, /* the compiled pattern */ + PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */ + &name_entry_size); /* where to put the answer */ + + tabptr = name_table; + temp.namescount = namecount; + temp.namesnum = (int*)malloc(sizeof(int) * temp.namescount); + temp.names = (char**)malloc(sizeof(char*) * temp.namescount); + if (temp.names == NULL || temp.namesnum == NULL) { + printf("Error: Out of memory\r\n"); + exit(-1); + } + for (i = 0; i < namecount; i++) + { + int n = (tabptr[0] << 8) | tabptr[1]; // << is a bitwise left shift operator. + temp.namesnum[i] = n; // stores the numerical value for name-number pairing to the struct. + // name table is stored in this format: + // 00 01 d a t e 00 ?? + // 00 05 d a y 00 ?? ?? + // etc. + // first two bytes (00 and 01) are the number of the capturing parenthesis, and ?? is an undefined byte. + // last 00 byte seems to be the zero termination of the string. + temp.names[i] = (char*)malloc(sizeof(char) * ((int)name_entry_size - 2)); + if (temp.names[i] == NULL) { + printf("Error: Out of memory\r\n"); + exit(-1); + } + memset(temp.names[i], 0, sizeof(char) * ((int)name_entry_size - 2)); // initializes the string array with null values. + memcpy(temp.names[i], (char *)(tabptr + 2), (int)(name_entry_size - 3)); + + tabptr += name_entry_size; + } + } + // printf("TESTING NAMESCOUNT: %d \r\n", temp.namescount); + return temp; +} + +void free_group_data(GroupData sVal) { + // printf("(C) cleaning up GroupData sVal.names[loop]...\n"); + int loop; + for (loop=0; loop0){ + free(sVal.names); + free(sVal.namesnum); + } +} + + // this function contains matching for a single match RegexStruct pcre2_single_jmatch(char *b, pcre2_code *re, int offset, MatchOptionsStruct *temp, pcre2_match_context *mcontext){ pcre2_match_data *match_data; diff --git a/src/main/java/com/teragrep/jpr_01/JavaPcre.java b/src/main/java/com/teragrep/jpr_01/JavaPcre.java index 7ca7d1b..67f0418 100644 --- a/src/main/java/com/teragrep/jpr_01/JavaPcre.java +++ b/src/main/java/com/teragrep/jpr_01/JavaPcre.java @@ -124,10 +124,21 @@ public static class ByValue extends RegexStruct implements Structure.ByValue {} public int rc; } + @FieldOrder({ "names", "namesnum", "namescount" }) + class GroupData extends Structure { + public static class ByValue extends GroupData implements Structure.ByValue {} + public Pointer names; // char** + public Pointer namesnum; + public int namescount; + } + void RegexStruct_cleanup(RegexStruct.ByValue sVal); CompileData.ByValue pcre2_jcompile(String pattern, int i, OptionsStruct options, Pointer ccontext); // returns struct containing compiled pattern re + GroupData.ByValue pcre2_get_info_group(Pointer re); // returns struct containing group info + void free_group_data(GroupData.ByValue sVal); // releases the memory allocated to the group info struct. + RegexStruct.ByValue pcre2_single_jmatch(String subject, Pointer re, int offset, MatchOptionsStruct match_options, Pointer mcontext); // returns pointer to a single match data. ErrorStruct.ByValue pcre2_translate_error_code(int errorcode); @@ -183,9 +194,27 @@ public JavaPcre(){ mcontext = null; // default value for when context is not used in match matchfound = false; JPCRE2_ERROR_NOMATCH = false; + name_table = new HashMap<>(); } // Make another constructor if/when memory management is implemented to the context functions. + public void set_name_table(LibJavaPcre.GroupData.ByValue groupData) { + if (groupData.namescount > 0) { + if (!name_table.isEmpty()) { + name_table.clear(); + } + final String[] regex_names = groupData.names.getStringArray(0, groupData.namescount); + final int[] namesnum = groupData.namesnum.getIntArray(0, groupData.namescount); + for (int namesloop = 0; namesloop < groupData.namescount; namesloop++) { + name_table.put(regex_names[namesloop], namesnum[namesloop]); + } + } else { + if (!name_table.isEmpty()) { + name_table.clear(); + } + } + } + public Map get_name_table(){ return name_table; } @@ -340,6 +369,12 @@ public void compile_java(String pat){ LibJavaPcre.INSTANCE.errorcleanup(p); throw new PatternSyntaxException(val, pattern, comp_val.erroroffset); } + else{ + // initialize the groupData and translate it to name_table format. + LibJavaPcre.GroupData.ByValue groupData = LibJavaPcre.INSTANCE.pcre2_get_info_group(re); + set_name_table(groupData); + LibJavaPcre.INSTANCE.free_group_data(groupData); + } } public boolean checkmatchoptionzero(){ @@ -354,7 +389,6 @@ public void singlematch_java(String a, int b){ if (a == null) { throw new IllegalStateException("Subject is null"); } - name_table = new LinkedHashMap<>(); subject = a; offset = b; match_table = new LinkedHashMap<>(); @@ -395,13 +429,6 @@ public void singlematch_java(String a, int b){ matchfound = true; final String[] regex_vals = regex_val.vals.getStringArray(0, regex_val.numVals); final int[] regex_ovector = regex_val.ovector.getIntArray(0, (regex_val.numVals + 2)); - if (regex_val.namescount > 0) { - final String[] regex_names = regex_val.names.getStringArray(0, regex_val.namescount); - final int[] namesnum = regex_val.namesnum.getIntArray(0, regex_val.namescount); - for (int namesloop = 0; namesloop < regex_val.namescount; namesloop++) { - name_table.put(regex_names[namesloop], namesnum[namesloop]); - } - } for (int regexloop = 0; regexloop < regex_val.numVals; regexloop++) { match_table.put(ind++, regex_vals[regexloop]); } @@ -421,6 +448,9 @@ public void jcompile_free(){ if (re != null){ LibJavaPcre.INSTANCE.pcre2_jcompile_free(re); re = null; + if (!name_table.isEmpty()) { + name_table.clear(); // clear name_table that is constructed using the compiled pattern. + } }else{ throw new IllegalStateException("No data to free"); } diff --git a/src/test/java/com/teragrep/jpr_01/JavaPcreIT.java b/src/test/java/com/teragrep/jpr_01/JavaPcreIT.java index 2c8be2b..a3d33ec 100644 --- a/src/test/java/com/teragrep/jpr_01/JavaPcreIT.java +++ b/src/test/java/com/teragrep/jpr_01/JavaPcreIT.java @@ -5,6 +5,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Assertions; +import java.util.HashMap; import java.util.Map; import org.slf4j.Logger; @@ -164,6 +165,22 @@ void pcre2_compile_test() { Assertions.assertEquals(null, s1.get_re()); } + @Test + void pcre2_compile_test_with_group_data() { + JavaPcre s1 = new JavaPcre(); + s1.compile_java("From:(?[^@]+)@(?[^\r]+)"); + Map expected_nameTable = new HashMap<>(); + expected_nameTable.put("nimi", 1); + expected_nameTable.put("sposti", 2); + Assertions.assertNotEquals(null, s1.get_re()); + Map nameTable = s1.get_name_table(); + Assertions.assertEquals(2, nameTable.size()); + Assertions.assertEquals(expected_nameTable, nameTable); + s1.jcompile_free(); + Assertions.assertEquals(null, s1.get_re()); + Assertions.assertEquals(0, nameTable.size()); + } + @Test void pcre2_translator_test() { // broken