-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathat_css.rb
86 lines (40 loc) · 1.5 KB
/
at_css.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
require 'nokogiri'
require 'open-uri'
require 'net/http'
# uri = URI('http://example.com/index.html?count=10')
# Net::HTTP.get(uri) # => String
url = "http://www.baidu.com"
# def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
# Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
# end
a = open(url) #<File:0x007fa4d2ac1740>
data = Nokogiri::HTML(open(url),"GB18030")
# puts data.at_css('div#wrapper div#head div.head_wrapper div#u1 a.mnav').text.strip # just first a.mnav
# puts data.css('div#wrapper div#head div.head_wrapper div#u1 a.mnav').text.strip # all a.mnav
doc = data.css('div#wrapper div#head div.head_wrapper div#u1 a.mnav')
doc.map{|d| puts d.content} # 新闻 hao123 地图 视频 贴吧
# puts a.entries
####
# Search for nodes by css
# doc.css('p > a').each do |a_tag|
# puts a_tag.content
# end
# ####
# # Search for nodes by xpath
# doc.xpath('//p/a').each do |a_tag|
# puts a_tag.content
# end
# ####
# # Or mix and match.
# doc.search('//p/a', 'p > a').each do |a_tag|
# puts a_tag.content
# end
# ###
# # Find attributes and their values
# doc.search('a').first['href']
# def rm_style doc
# doc.search("img").each{|i| i.set_attribute('class', nil)}
# end
# url = li.css("a").attr("href").to_s rescue nil # attr 方法
# published_at = Time.zone.parse(li.css("span").text) rescue nil #将字符串 转换为 datetime
content = Nokogiri::HTML(http_get(url), nil, GB18030).css("#{ContentBody p}")[0..-2]