Skip to content

Commit

Permalink
fix #44
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaohuo committed Sep 25, 2020
1 parent e637b49 commit 4ad42a4
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 1 deletion.
9 changes: 9 additions & 0 deletions src/main/java/org/seimicrawler/xpath/core/node/Text.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -59,6 +61,13 @@ public void head(Node node, int depth) {
}
Element data = new Element(Constants.DEF_TEXT_TAG_NAME);
data.text(textNode.getWholeText());
try {
Method parent = Node.class.getDeclaredMethod("setParentNode",Node.class);
parent.setAccessible(true);
parent.invoke(data,textNode.parent());
} catch (Exception e) {
//ignore
}
CommonUtil.setSameTagIndexInSiblings(data,index);
res.add(data);
}
Expand Down
23 changes: 22 additions & 1 deletion src/test/java/org/seimicrawler/xpath/JXDocumentTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,11 @@ public void testA(){
}
}

/**
* fix https://github.com/zhegexiaohuozi/JsoupXpath/issues/52
*/
@Test
public void FixTextBehaviorTest(){
public void fixTextBehaviorTest(){
String html = "<p><span class=\"text-muted\">分类:</span>动漫<span class=\"split-line\"></span><span class=\"text-muted hidden-xs\">地区:</span>日本<span class=\"split-line\"></span><span class=\"text-muted hidden-xs\">年份:</span>2010</p>";
JXDocument jxDocument = JXDocument.create(html);
List<JXNode> jxNodes = jxDocument.selN("//text()[3]");
Expand All @@ -235,4 +238,22 @@ public void FixTextBehaviorTest(){
logger.info("all = {}",allText);
}

/**
* fix https://github.com/zhegexiaohuozi/JsoupXpath/issues/44
*/
@Test
public void fixTextElNoParentTest(){
String test="<div class='a'> a <div>need</div> <div class='e'> not need</div> c </div>";
JXDocument j = JXDocument.create(test);
List<JXNode> l = j.selN("//div[@class='a']//text()[not(ancestor::div[@class='e'])]");
Set<String> finalRes = new HashSet<>();
for (JXNode i : l){
logger.info("{}",i.toString());
finalRes.add(i.asString());
}
Assert.assertFalse(finalRes.contains("not need"));
Assert.assertTrue(finalRes.contains("need"));
Assert.assertEquals(4, finalRes.size());
}

}

0 comments on commit 4ad42a4

Please sign in to comment.