Skip to content

Commit

Permalink
Parsing feeds into structs
Browse files Browse the repository at this point in the history
  • Loading branch information
zetaben committed Aug 24, 2010
1 parent 68958bd commit d852815
Show file tree
Hide file tree
Showing 9 changed files with 326 additions and 2 deletions.
6 changes: 6 additions & 0 deletions lib/opds.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
require 'opds/opds'
require 'opds/support/logging'
require 'opds/support/browser'
require 'opds/support/linkset'
require 'opds/parser'

require 'opds/feed'
require 'opds/entry'
require 'opds/acquisition_feed'
require 'opds/navigation_feed'
4 changes: 4 additions & 0 deletions lib/opds/acquisition_feed.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
module OPDS
class AcquisitionFeed < Feed
end
end
7 changes: 7 additions & 0 deletions lib/opds/entry.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
module OPDS
class Entry
def self.from_nokogiri(content)
self.new
end
end
end
125 changes: 125 additions & 0 deletions lib/opds/feed.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
module OPDS
class Feed
include Logging
attr_reader :raw_doc
attr_reader :entries


def initialize(browser=nil)
@browser=browser
@browser||=OPDS::Support::Browser.new
end

# access root catalog
def root
return @root unless root?
self
end

# root catalog predicate
def root?
end

def self.parse_url(url,browser=nil,parser_opts={})
@browser=browser
@browser||=OPDS::Support::Browser.new
@browser.go_to(url)
if @browser.ok?
return self.parse_raw(@browser.body,parser_opts)
else
return false
end
end

def self.parse_raw(txt,opts={})
parser=OPDSParser.new(opts)
pfeed=parser.parse(txt)
type=parser.sniffed_type
return pfeed
end

def self.from_nokogiri(content)
z=self.new
z.instance_variable_set('@raw_doc',content)
z.serialize!
z
end

#read xml entries into entry struct
def serialize!
@entries=raw_doc.xpath('/xmlns:feed/xmlns:entry',raw_doc.root.namespaces).map do |el|
OPDS::Entry.from_nokogiri(el)
end
end

def title
raw_doc.at('/xmlns:feed/xmlns:title',raw_doc.root.namespaces).text
end

def icon
raw_doc.at('/xmlns:feed/xmlns:icon',raw_doc.root.namespaces).text
end

def links
if !@links || @links.size ==0
@links=OPDS::Support::LinkSet.new
raw_doc.xpath('/xmlns:feed/xmlns:link',raw_doc.root.namespaces).each do |n|
text=nil
text=n.attributes['title'].value unless n.attributes['title'].nil?
link=n.attributes['href'].value
unless n.attributes['rel'].nil?
n.attributes['rel'].value.split.each do |rel|
@links.push(rel,link,text)
end
else
@links.push(nil,link,text)
end
end

end
@links
end

def id
raw_doc.at('/xmlns:feed/xmlns:id',raw_doc.root.namespaces).text
end

def author
{
:name => raw_doc.at('/xmlns:feed/xmlns:author/xmlns:name',raw_doc.root.namespaces).text,
:uri => raw_doc.at('/xmlns:feed/xmlns:author/xmlns:uri',raw_doc.root.namespaces).text,
:email => raw_doc.at('/xmlns:feed/xmlns:author/xmlns:email',raw_doc.root.namespaces).text
}
end


def next_page_url
links.link_url(:rel => 'next')
end

def prev_page_url
links.link_url(:rel => 'prev')
end

def paginated?
!next_page_url.nil?||!prev_page_url.nil?
end

def first_page?
!prev_page_url if paginated?
end

def last_page?
!next_page_url if paginated?
end

def next_page
Feed.parse_url(next_page_url,@browser)
end

def prev_page
Feed.parse_url(prev_page_url,@browser)
end

end
end
4 changes: 4 additions & 0 deletions lib/opds/navigation_feed.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
module OPDS
class NavigationFeed < Feed
end
end
7 changes: 6 additions & 1 deletion lib/opds/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,13 @@ def initialize(opts={})
def parse(content)
@ret=Nokogiri::XML(content)
@sniffed_type=sniff(@ret)
@ret
case @sniffed_type
when :acquisition then return OPDS::AcquisitionFeed.from_nokogiri(@ret)
when :navigation then return OPDS::NavigationFeed.from_nokogiri(@ret)
when :entry then return OPDS::Entry.from_nokogiri(@ret)
end
end

protected
def sniff(doc)
return :entry if doc.root.name=='entry'
Expand Down
90 changes: 90 additions & 0 deletions lib/opds/support/linkset.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
module OPDS
module Support
class LinkSet
include Enumerable
def initialize
@rel_store=Hash.new
@txt_store=Hash.new
@lnk_store=Hash.new
@store=[]
end

def []=(k,v)
@store.push [k]+v
i=@store.size-1
@rel_store[k]=[] unless @rel_store[k]
@rel_store[k].push i
@txt_store[v.last]=[] unless @txt_store[v.last]
@txt_store[v.last].push i
@lnk_store[v.first]=[] unless @lnk_store[v.first]
@lnk_store[v.first].push i

end

def [](k)
remap(@rel_store[k])
end

def each(&block)
@store.each(&block)
end

def push(rel,link,text=nil)
self[rel]=[link,text]
end

def link_url(k)
ty,v=k.first
t=remap(collection(ty)[v])
t.first[1] unless t.nil?
end

def link_rel(k)
ty,v=k.first
t=remap(collection(ty)[v])
t.first[0] unless t.nil?
end

def link_text(k)
ty,v=k.first
t=remap(collection(ty)[v])
t.first[2] unless t.nil?
end

def size
@store.size
end

def by(type)
Hash[collection(type).map{|k,v| [k,remap(v)]}]
end

def links
@lnk_store.keys
end

def rels
@rel_store.keys
end

def texts
@txt_store.keys
end

protected
def collection(type)
case type.to_s
when 'link' then @lnk_store
when 'rel' then @rel_store
when 'txt' then @txt_store
end
end

def remap(tab)
return nil if tab.nil? || tab.size==0
tab.map{|i| @store[i]}
end

end
end
end
34 changes: 34 additions & 0 deletions spec/linkset_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')

describe OPDS::Support::LinkSet do
before(:each) do
subject.push('root','http://feedbooks.com','Racine')
subject.push('subsection','http://feedbooks.com/publicdomain','Domaine pub')
subject.push('subsection','http://feedbooks.com/original','Original')
subject.push('subsection','http://feedbooks.com/feed','feeds')
subject.push('http://opds-spec.org/shelf','http://feedbooks.com/shelf','shelf')
subject.push('related','http://feedbooks.com/shelf',nil)
end

it do
subject.size.should be(6)
end

it do
subject.map(&:first).size.should be(6)
end

it "should find 3 subsection" do
subject['subsection'].size.should be(3)
subject.by(:rel)['subsection'].size.should be(3)
end

it "should give root url" do
subject.link_url(:rel => 'root').should == ('http://feedbooks.com')
subject.by(:rel)['root'].first[1].should == ('http://feedbooks.com')
end

it "get all text values" do
subject.texts.size.should be(6)
end
end
51 changes: 50 additions & 1 deletion spec/opdsparser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,58 @@ def sample(type)
lambda { subject.parse(sample(feed_type)) }.should_not raise_error
end

it "should should sniff entry" do
it "should sniff entry" do
subject.parse(sample(feed_type))
subject.sniffed_type.should be(feed_type)
end

it "should return an instance of the correct class " do
subject.parse(sample(feed_type)).class.should be({:entry => OPDS::Entry, :navigation => OPDS::NavigationFeed,:acquisition => OPDS::AcquisitionFeed}[feed_type])
end
end

[ :acquisition, :navigation].each do |feed_type|
it "should have a feed title" do
subject.parse(sample(feed_type)).title.size.should_not be(0)
end

it "should have a feed icon" do
subject.parse(sample(feed_type)).icon.size.should_not be(0)
end

it "should have feed links" do
subject.parse(sample(feed_type)).links.size.should_not be(0)
end

it "should have feed id" do
subject.parse(sample(feed_type)).id.size.should_not be(0)
end
it "should have a feed author" do
auth=subject.parse(sample(feed_type)).author
auth[:name].should == ('Feedbooks')
auth[:uri].should == ('http://www.feedbooks.com')
auth[:email].should == ('support@feedbooks.com')
end

it "should have entries" do
subject.parse(sample(feed_type)).entries.size.should_not be(0)
end


end

it do
subject.parse(sample(:acquisition)).should be_paginated()
end

it do
subject.parse(sample(:acquisition)).should be_first_page()
end

it do
feed=nil
lambda { feed=subject.parse(sample(:acquisition)).next_page }.should_not raise_error

feed.class.should be(OPDS::AcquisitionFeed)
end
end

0 comments on commit d852815

Please sign in to comment.