Skip to content

Commit

Permalink
'#467 - External parser regex group named key and value are respectively
Browse files Browse the repository at this point in the history
used as metadata key and value. If inexistent, uses group 1 and 2 to
keep backward compatibility.
- Created regex for Recyclebin and Prefetch timestamps extractions.
- Created parsing logic on DateUtil class to parse Prefetch dates.
  • Loading branch information
patrickdalla committed Jun 16, 2023
1 parent 5b23fc8 commit 91a8ac6
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 4 deletions.
7 changes: 7 additions & 0 deletions iped-app/resources/config/conf/ExternalParsers.xml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@
<mime-type>application/x-prefetch</mime-type>
</mime-types>
<output-charset>ISO-8859-1</output-charset>
<metadata>
<match>(?&lt;key&gt;[A-Za-z0-9\s]+):\s*(\d+\s*:\s*)?(?&lt;value&gt;((Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s(0[1-9]|[1-2][0-9]|3[0-1]),?\s\d{4}\s([0-1][0-9]|2[0-3]):([0-5][0-9]):(([0-5][0-9])(\.\d*)?)\sUTC))</match>
</metadata>
</parser>

<parser>
Expand All @@ -94,6 +97,10 @@
</mime-types>
<output-charset>UTF-8</output-charset>
<firstLinesToIgnore>3</firstLinesToIgnore>

<metadata>
<match key="deleted">(\d{4}[:-](0[1-9]|1[0-2])[:-](0[1-9]|[1-2][0-9]|3[0-1])(\s|T)([0-1][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])Z?)</match>
</metadata>
</parser>

<parser>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ private void parse(TikaInputStream stream, XHTMLContentHandler xhtml, Metadata m
boolean inputToStdIn = true;
boolean outputFromStdOut = true;
boolean hasPatterns = (metadataPatterns != null && !metadataPatterns.isEmpty());


File outputFile = tmp.createTemporaryFile();

// Build our command
Expand Down Expand Up @@ -286,6 +286,9 @@ private void parse(TikaInputStream stream, XHTMLContentHandler xhtml, Metadata m
// Execute
Process process = null;
try {
if(cmd[0].contains("sccainfo")) {
System.out.println();
}
if (cmd.length == 1) {
process = Runtime.getRuntime().exec(cmd[0], null, workDir);
} else {
Expand Down Expand Up @@ -515,7 +518,20 @@ private void extractMetadata(final InputStream stream, final Metadata metadata)
if (metadataPatterns.get(p) != null && !metadataPatterns.get(p).equals("")) {
metadata.add(metadataPatterns.get(p), m.group(1));
} else {
metadata.add(m.group(1), m.group(2));
String propertyName;
String value;
try {
propertyName = m.group("key");
propertyName=propertyName.replace(" ", "").replace("\t", "").replace("\n", "").replace("\r", "");
}catch (IllegalArgumentException iae) {
propertyName = m.group(1);
}
try {
value = m.group("value");
}catch (IllegalArgumentException iae) {
value = m.group(2);
}
metadata.add(parserName+":"+propertyName, value);
}
}
}
Expand Down
44 changes: 42 additions & 2 deletions iped-utils/src/main/java/iped/utils/DateUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ public class DateUtil {

private static final DateUtil INSTANCE = new DateUtil();

private static Pattern pattern;

private static DateFormat createDateFormat(String format, TimeZone timezone) {
final SimpleDateFormat sdf = new SimpleDateFormat(format, new DateFormatSymbols(Locale.US));
if (timezone != null) {
Expand All @@ -38,6 +40,8 @@ private static DateFormat createDateFormat(String format, TimeZone timezone) {
*/
private final List<DateFormat> iso8601InputFormats = loadDateFormats();

private final DateFormat monthFormat = new SimpleDateFormat("MMM dd, YYYY HH:mm:ssZ");

private List<DateFormat> loadDateFormats() {
List<DateFormat> dateFormats = new ArrayList<>();
// yyyy-mm-ddThh...
Expand Down Expand Up @@ -84,6 +88,26 @@ public Date tryToParse(String dateString) {
return null;
}

/**
* Tries to parse the date string; returns null if no parse was possible.
*
* This is not thread safe! Wrap in synchronized or create new {@link DateUtils}
* for each class.
*
* @param dateString
* @return
*/
public Date tryToParseExt(String dateString) {
try {
dateString = dateString.replaceAll("\\.\\d*\\s", "");//removes milliseconds
return monthFormat.parse(dateString);
} catch (java.text.ParseException e) {

}

return null;
}

/**
* Thread-safe method internally synchronized
*
Expand All @@ -95,8 +119,15 @@ public static Date tryToParseDate(String val) {
synchronized (INSTANCE) {
return INSTANCE.tryToParse(val);
}
} else
return null;
} else {
if(val.length()>=21 && Character.isDigit(val.charAt(4)) && val.charAt(6)==',') {
synchronized (INSTANCE) {
return INSTANCE.tryToParseExt(val);
}
}else {
return null;
}
}
}

// Thread local variable
Expand All @@ -117,4 +148,13 @@ public static Date stringToDate(String date) throws ParseException {
return threadLocal.get().parse(date);
}

public static Pattern getDateStrPattern(){
if(pattern==null) {
String patternStr = "(?<ISO>\\d{4}[:-](0[1-9]|1[0-2])[:-](0[1-9]|[1-2][0-9]|3[0-1])(\\s|T)([0-1][0-9]|2[0-3])\\:([0-5][0-9])\\:([0-5][0-9])Z?)"
+ "|((Mon|Tue|Wed|Thu|Fri|Sat|Sun)\\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\s(0[1-9]|[1-2][0-9]|3[0-1])\\s([0-1][0-9]|2[0-3])\\:([0-5][0-9])\\:(([0-5][0-9])Z?)\\s\\d{4})";
pattern = Pattern.compile(patternStr);
}
return pattern;
}

}

0 comments on commit 91a8ac6

Please sign in to comment.