Skip to content

V2.5.3

Latest
Compare
Choose a tag to compare
@zhegexiaohuozi zhegexiaohuozi released this 07 Mar 04:25
· 5 commits to master since this release

优化following-sibling following preceding-sibling preceding 行为,以便更好的适配文本提取场景,如下:

    @Test
    public void issue64And65(){
        String content = "<div class='a'>1</div>" +
                "<div>2</div>\n" +
                "<div class='a'>3</div>\n" +
                "<div>4</div>\n" +
                "<div>5</div>11" +
                "<tag>6</tag>12" +
                "<div>7<span>8</span></div>" +
                "";
        JXDocument j = JXDocument.create(content);
        Assert.assertEquals("7", j.selNOne("//div[text()='5']/following-sibling::div/text()").asString());
        Assert.assertEquals("6", j.selNOne("//div[text()='5']/following-sibling::tag/text()").asString());
        Assert.assertEquals("11", j.selNOne("//div[text()='5']/following-sibling::text()").asString());
        Assert.assertEquals("12", j.selNOne("//div[text()='7']/preceding-sibling::text()").asString());
        Assert.assertEquals("5", j.selNOne("//div[text()='7']/preceding-sibling::div/text()").asString());
        Assert.assertEquals("6", j.selNOne("//div[text()='7']/preceding-sibling::tag/text()").asString());
        Assert.assertEquals("6", j.selNOne("//div[text()='7']/preceding-sibling::tag/text()").asString());
        Assert.assertEquals("11 6 12 7 8", j.selN("//div[text()='5']/following::text()").stream().map(Objects::toString).collect(Collectors.joining(" ")).trim());
        Assert.assertEquals("6", j.selN("//div[text()='5']/following::tag/text()").stream().map(Objects::toString).collect(Collectors.joining(" ")).trim());
        Assert.assertEquals("8", j.selN("//div[text()='5']/following::span/text()").stream().map(Objects::toString).collect(Collectors.joining(" ")).trim());
        Assert.assertEquals("5 7", j.selN("//div[text()='4']/following::div/text()").stream().map(Objects::toString).collect(Collectors.joining(" ")).trim());
        Assert.assertEquals("2 1", j.selN("//div[text()='3']/preceding::text()").stream().map(Objects::toString).collect(Collectors.joining(" ")).trim());
        Assert.assertEquals("3  2 1", j.selN("//div[text()='4']/preceding::text()").stream().map(Objects::toString).collect(Collectors.joining(" ")).trim());
    }

以及豆瓣详情页提取测试:

    @Test
    public void testDoubanDetailInfoExtra() throws Exception{
        JXDocument doc = createFromResource("d_detail_page.html");
        JXNode score = doc.selNOne("//*[@id=\"interest_sectl\"]/div/div[2]/strong/text()");
        logger.info("{}", score.asString());
        JXNode title = doc.selNOne("//*[@id=\"wrapper\"]/h1/span/text()");
        logger.info("{}", title.asString());
        JXNode pageNum = doc.selNOne("//*[@id=\"info\"]/span[contains(text(),'页数')]/following-sibling::text()");
        logger.info("{}", pageNum.asString());
        Assert.assertEquals("956", pageNum.asString());
        JXNode price = doc.selNOne("//*[@id=\"info\"]/span[contains(text(),'定价')]/following-sibling::text()");
        logger.info("{}", price.asString());
        Assert.assertEquals("139.00元", price.asString());
    }