Skip to content

Commit

Permalink
Merge pull request #2329 from sparklemotion/flavorjones-GHSA-2rr5-8q3…
Browse files Browse the repository at this point in the history
…7-2w7h_1.12.x

fix JRuby SAX parser entity handling (v1.12.x backport)
  • Loading branch information
flavorjones authored Sep 26, 2021
2 parents 01e1618 + 4bd943c commit 6b60637
Show file tree
Hide file tree
Showing 12 changed files with 804 additions and 868 deletions.
33 changes: 14 additions & 19 deletions ext/java/nokogiri/Html4SaxPushParser.java
Original file line number Diff line number Diff line change
@@ -1,31 +1,26 @@
package nokogiri;

import static nokogiri.XmlSaxPushParser.terminateExecution;
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
import static org.jruby.runtime.Helpers.invoke;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.IOException;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.ThreadFactory;

import nokogiri.internals.*;

import nokogiri.internals.ClosedStreamException;
import nokogiri.internals.NokogiriBlockingQueueInputStream;
import nokogiri.internals.NokogiriHelpers;
import nokogiri.internals.ParserContext;
import org.jruby.Ruby;
import org.jruby.RubyClass;
import org.jruby.RubyObject;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.exceptions.RaiseException;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.concurrent.*;

import static nokogiri.XmlSaxPushParser.terminateExecution;
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
import static org.jruby.runtime.Helpers.invoke;

/**
* Class for Nokogiri::HTML4::SAX::PushParser
*
Expand Down Expand Up @@ -134,7 +129,7 @@ public class Html4SaxPushParser extends RubyObject

if (!options.recover && parserTask.getErrorCount() > errorCount0) {
terminateTask(context.runtime);
throw parserTask.getLastError();
throw parserTask.getLastError().toThrowable();
}

return this;
Expand Down
124 changes: 31 additions & 93 deletions ext/java/nokogiri/XmlSaxParserContext.java
Original file line number Diff line number Diff line change
@@ -1,33 +1,23 @@
package nokogiri;

import static org.jruby.runtime.Helpers.invoke;

import java.io.IOException;
import java.io.InputStream;

import nokogiri.internals.*;
import org.apache.xerces.parsers.AbstractSAXParser;
import org.jruby.Ruby;
import org.jruby.RubyClass;
import org.jruby.RubyFixnum;
import org.jruby.RubyModule;
import org.jruby.RubyObjectAdapter;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.exceptions.RaiseException;
import org.jruby.javasupport.JavaEmbedUtils;
import org.jruby.runtime.Helpers;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.xml.sax.ContentHandler;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.SAXParseException;

import nokogiri.internals.NokogiriHandler;
import nokogiri.internals.NokogiriHelpers;
import nokogiri.internals.ParserContext;
import nokogiri.internals.XmlSaxParser;
import java.io.IOException;
import java.io.InputStream;

import static org.jruby.runtime.Helpers.invoke;

/**
* Base class for the SAX parsers.
Expand All @@ -51,6 +41,7 @@ public class XmlSaxParserContext extends ParserContext
protected AbstractSAXParser parser;

protected NokogiriHandler handler;
protected NokogiriErrorHandler errorHandler;
private boolean replaceEntities = true;
private boolean recovery = false;

Expand Down Expand Up @@ -168,31 +159,12 @@ public class XmlSaxParserContext extends ParserContext
return (XmlSaxParserContext) NokogiriService.XML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(runtime, klazz);
}

/**
* Set a property of the underlying parser.
*/
protected void
setProperty(String key, Object val)
throws SAXNotRecognizedException, SAXNotSupportedException
{
parser.setProperty(key, val);
}

protected void
setContentHandler(ContentHandler handler)
{
parser.setContentHandler(handler);
}

protected void
setErrorHandler(ErrorHandler handler)
{
parser.setErrorHandler(handler);
}

public final NokogiriHandler
getNokogiriHandler() { return handler; }

public final NokogiriErrorHandler
getNokogiriErrorHandler() { return errorHandler; }

/**
* Perform any initialization prior to parsing with the handler
* <code>handlerRuby</code>. Convenience hook for subclasses.
Expand Down Expand Up @@ -223,6 +195,17 @@ public class XmlSaxParserContext extends ParserContext
parser.parse(getInputSource());
}

protected static Options
defaultParseOptions(ThreadContext context)
{
return new ParserContext.Options(
RubyFixnum.fix2long(Helpers.invoke(context,
((RubyClass)context.getRuntime().getClassFromPath("Nokogiri::XML::ParseOptions"))
.getConstant("DEFAULT_XML"),
"to_i"))
);
}

@JRubyMethod
public IRubyObject
parse_with(ThreadContext context, IRubyObject handlerRuby)
Expand All @@ -233,14 +216,19 @@ public class XmlSaxParserContext extends ParserContext
throw runtime.newArgumentError("argument must respond_to document");
}

NokogiriHandler handler = this.handler = new NokogiriHandler(runtime, handlerRuby);
preParse(runtime, handlerRuby, handler);
/* TODO: how should we pass in parse options? */
ParserContext.Options options = defaultParseOptions(context);

errorHandler = new NokogiriStrictErrorHandler(runtime, options.noError, options.noWarning);
handler = new NokogiriHandler(runtime, handlerRuby, errorHandler);

setContentHandler(handler);
setErrorHandler(handler);
preParse(runtime, handlerRuby, handler);
parser.setContentHandler(handler);
parser.setErrorHandler(handler);
parser.setEntityResolver(new NokogiriEntityResolver(runtime, errorHandler, options));

try {
setProperty("http://xml.org/sax/properties/lexical-handler", handler);
parser.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
} catch (Exception ex) {
throw runtime.newRuntimeError("Problem while creating XML SAX Parser: " + ex.toString());
}
Expand Down Expand Up @@ -270,8 +258,6 @@ public class XmlSaxParserContext extends ParserContext

postParse(runtime, handlerRuby, handler);

//maybeTrimLeadingAndTrailingWhitespace(context, handlerRuby);

return runtime.getNil();
}

Expand Down Expand Up @@ -319,53 +305,6 @@ public class XmlSaxParserContext extends ParserContext
return context.runtime.newBoolean(recovery);
}

/**
* If the handler's document is a FragmentHandler, attempt to trim
* leading and trailing whitespace.
*
* This is a bit hackish and depends heavily on the internals of
* FragmentHandler.
*/
protected void
maybeTrimLeadingAndTrailingWhitespace(ThreadContext context, IRubyObject parser)
{
RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter();
RubyModule mod = context.getRuntime().getClassFromPath("Nokogiri::XML::FragmentHandler");

IRubyObject handler = adapter.getInstanceVariable(parser, "@document");
if (handler == null || handler.isNil() || !adapter.isKindOf(handler, mod)) {
return;
}
IRubyObject stack = adapter.getInstanceVariable(handler, "@stack");
if (stack == null || stack.isNil()) {
return;
}
// doc is finally a DocumentFragment whose nodes we can check
IRubyObject doc = adapter.callMethod(stack, "first");
if (doc == null || doc.isNil()) {
return;
}

IRubyObject children;

for (;;) {
children = adapter.callMethod(doc, "children");
IRubyObject first = adapter.callMethod(children, "first");
if (NokogiriHelpers.isBlank(first)) { adapter.callMethod(first, "unlink"); }
else { break; }
}

for (;;) {
children = adapter.callMethod(doc, "children");
IRubyObject last = adapter.callMethod(children, "last");
if (NokogiriHelpers.isBlank(last)) { adapter.callMethod(last, "unlink"); }
else { break; }
}

// While we have a document, normalize it.
((XmlNode) doc).normalize();
}

@JRubyMethod(name = "column")
public IRubyObject
column(ThreadContext context)
Expand All @@ -383,5 +322,4 @@ public class XmlSaxParserContext extends ParserContext
if (number == null) { return context.getRuntime().getNil(); }
return RubyFixnum.newFixnum(context.getRuntime(), number.longValue());
}

}
42 changes: 17 additions & 25 deletions ext/java/nokogiri/XmlSaxPushParser.java
Original file line number Diff line number Diff line change
@@ -1,32 +1,24 @@
package nokogiri;

import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
import static org.jruby.runtime.Helpers.invoke;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.ThreadFactory;

import nokogiri.internals.*;
import org.jruby.Ruby;
import org.jruby.RubyClass;
import org.jruby.RubyException;
import org.jruby.RubyObject;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.exceptions.RaiseException;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;

import nokogiri.internals.ClosedStreamException;
import nokogiri.internals.NokogiriBlockingQueueInputStream;
import nokogiri.internals.NokogiriHandler;
import nokogiri.internals.NokogiriHelpers;
import nokogiri.internals.ParserContext;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.concurrent.*;

import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
import static org.jruby.runtime.Helpers.invoke;

/**
* Class for Nokogiri::XML::SAX::PushParser
Expand Down Expand Up @@ -159,7 +151,8 @@ public class XmlSaxPushParser extends RubyObject

if (!options.recover && parserTask.getErrorCount() > errorCount0) {
terminateTask(context.runtime);
throw ex = parserTask.getLastError();
ex = parserTask.getLastError().toThrowable();
throw ex;
}

return this;
Expand Down Expand Up @@ -278,16 +271,15 @@ static class ParserTask extends ParserContext.ParserTask<XmlSaxParserContext>
getErrorCount()
{
// check for null because thread may not have started yet
if (parser.getNokogiriHandler() == null) { return 0; }
return parser.getNokogiriHandler().getErrorCount();
if (parser.getNokogiriErrorHandler() == null) { return 0; }
return parser.getNokogiriErrorHandler().getErrors().size();
}

synchronized final RaiseException
synchronized final RubyException
getLastError()
{
return parser.getNokogiriHandler().getLastError();
List<RubyException> errors = parser.getNokogiriErrorHandler().getErrors();
return errors.get(errors.size() - 1);
}

}

}
2 changes: 1 addition & 1 deletion ext/java/nokogiri/internals/NokogiriEntityResolver.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public class NokogiriEntityResolver implements EntityResolver2
private void
addError(String errorMessage)
{
if (handler != null) { handler.errors.add(new Exception(errorMessage)); }
if (handler != null) { handler.addError(new Exception(errorMessage)); }
}

/**
Expand Down
37 changes: 29 additions & 8 deletions ext/java/nokogiri/internals/NokogiriErrorHandler.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
package nokogiri.internals;

import java.util.ArrayList;
import java.util.List;

import nokogiri.XmlSyntaxError;
import org.apache.xerces.xni.parser.XMLErrorHandler;
import org.jruby.Ruby;
import org.jruby.RubyException;
import org.jruby.exceptions.RaiseException;
import org.xml.sax.ErrorHandler;

import java.util.ArrayList;
import java.util.List;

/**
* Super class of error handlers.
*
Expand All @@ -17,23 +21,40 @@
*/
public abstract class NokogiriErrorHandler implements ErrorHandler, XMLErrorHandler
{
protected final List<Exception> errors;
private final Ruby runtime;
protected final List<RubyException> errors;
protected boolean noerror;
protected boolean nowarning;

public
NokogiriErrorHandler(boolean noerror, boolean nowarning)
NokogiriErrorHandler(Ruby runtime, boolean noerror, boolean nowarning)
{
this.errors = new ArrayList<Exception>(4);
this.runtime = runtime;
this.errors = new ArrayList<RubyException>(4);
this.noerror = noerror;
this.nowarning = nowarning;
}

List<Exception>
public List<RubyException>
getErrors() { return errors; }

public void
addError(Exception ex) { errors.add(ex); }
addError(Exception ex)
{
addError(XmlSyntaxError.createXMLSyntaxError(runtime, ex));
}

public void
addError(RubyException ex)
{
errors.add(ex);
}

public void
addError(RaiseException ex)
{
addError(ex.getException());
}

protected boolean
usesNekoHtml(String domain)
Expand Down
Loading

0 comments on commit 6b60637

Please sign in to comment.