Skip to content

Commit

Permalink
'#2231 Creates merging code for parsers configuration from profiles.
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickdalla committed May 27, 2024
1 parent 0c7567c commit 1bd42f7
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 2 deletions.
10 changes: 10 additions & 0 deletions iped-app/resources/config/profiles/forensic/conf/ParserConfig.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>

<!-- this config is merged with default config XML -->
<properties>

<parsers>
<parser class="org.apache.tika.parser.journal.JournalParser"></parser>
</parsers>

</properties>
102 changes: 100 additions & 2 deletions iped-engine/src/main/java/iped/engine/config/ParsersConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,34 @@

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.DirectoryStream.Filter;
import java.nio.file.Files;
import java.nio.file.Path;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import iped.configuration.Configurable;

public class ParsersConfig implements Configurable<String> {
Expand All @@ -18,6 +41,8 @@ public class ParsersConfig implements Configurable<String> {

private static final String PARSER_CONFIG = "ParserConfig.xml"; //$NON-NLS-1$

public static final String PARSER_DISABLED_ATTR = "iped:disabled";

private String parserConfigXml;
private transient Path tmp;

Expand All @@ -33,7 +58,57 @@ public boolean accept(Path entry) throws IOException {

@Override
public void processConfig(Path resource) throws IOException {
parserConfigXml = new String(Files.readAllBytes(resource), StandardCharsets.UTF_8);
if (parserConfigXml == null) {
parserConfigXml = new String(Files.readAllBytes(resource), StandardCharsets.UTF_8);
parserConfigXml = parserConfigXml.trim().replaceFirst("^([\\W]+)<", "<");
} else {
String changedParserConfigXml = new String(Files.readAllBytes(resource), StandardCharsets.UTF_8);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newDefaultInstance();
dbf.setNamespaceAware(false);
DocumentBuilder db;
try {
db = dbf.newDocumentBuilder();
Document doc = db.parse(new InputSource(new StringReader(parserConfigXml)));
Document changedDoc = db.parse(resource.toFile());

Element root = changedDoc.getDocumentElement();
NodeList rootNl = root.getElementsByTagName("parsers").item(0).getChildNodes();
for (int i = 0; i < rootNl.getLength(); i++) {
Node child = rootNl.item(i);
if (child instanceof Element) {
Element element = (Element) child;
if (element.getTagName().equals("parser")) {
String className = element.getAttribute("class");
XPath xPath = XPathFactory.newInstance().newXPath();
String expression = "/properties/parsers/parser[@class='" + className + "']";
NodeList nlParser = (NodeList) xPath.compile(expression).evaluate(doc,
XPathConstants.NODESET);

expression = "/properties/parsers";
NodeList nlParsers = (NodeList) xPath.compile(expression).evaluate(doc,
XPathConstants.NODESET);
Node newnode = doc.importNode(element, true);
for (int j = 0; j < nlParsers.getLength(); j++) {
for (int k = 0; k < nlParser.getLength(); k++) {
nlParsers.item(j).removeChild(nlParser.item(k));
}
nlParsers.item(j).appendChild(newnode);
}
}
}
}

TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
StringWriter writer = new StringWriter();
transformer.transform(new DOMSource(doc), new StreamResult(writer));
parserConfigXml = writer.getBuffer().toString();
} catch (ParserConfigurationException | SAXException | XPathExpressionException | TransformerException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

@Override
Expand All @@ -45,12 +120,35 @@ public String getConfiguration() {
public void setConfiguration(String config) {
parserConfigXml = config;
}

public String removeDisabledParsers(String parserConfigXml) {
String[] slices = parserConfigXml.split(PARSER_DISABLED_ATTR+"=\"true\"");
StringBuffer result=new StringBuffer();
for (int i = 0; i < slices.length; i++) {
String part = slices[i];
if(i>0) {
int disabledParserEndIndex = part.indexOf(">");
if(disabledParserEndIndex==0 || part.charAt(disabledParserEndIndex-1)!='/') {
disabledParserEndIndex = part.indexOf("</parser>");
}
part=part.substring(disabledParserEndIndex+1);
}
if(i<slices.length-1) {
int disabledParserIndex = part.lastIndexOf("<parser");
result.append(part.substring(0, disabledParserIndex));
}else {
result.append(part);
}
}
return result.toString();
}

public synchronized File getTmpConfigFile() {
if (tmp == null) {
try {
tmp = Files.createTempFile("parser-config", ".xml");
Files.write(tmp, parserConfigXml.getBytes(StandardCharsets.UTF_8));

Files.write(tmp, removeDisabledParsers(parserConfigXml).getBytes(StandardCharsets.UTF_8));
tmp.toFile().deleteOnExit();

} catch (IOException e) {
Expand Down

0 comments on commit 1bd42f7

Please sign in to comment.