Make the feed parsing more robust

This commit is contained in:
Aaron Mueller 2013-01-05 00:28:01 +01:00
parent 9ea67909cc
commit 85b78e6dc7
3 changed files with 16 additions and 10 deletions

View file

@ -118,7 +118,7 @@ GEM
sprockets (2.1.2) sprockets (2.1.2)
hike (~> 1.2) hike (~> 1.2)
rack (~> 1.0) rack (~> 1.0)
tilt (!= 1.3.0, ~> 1.1) tilt (~> 1.1, != 1.3.0)
sqlite3 (1.3.5) sqlite3 (1.3.5)
thor (0.14.6) thor (0.14.6)
tilt (1.3.3) tilt (1.3.3)

View file

@ -1,4 +1,5 @@
require "pp" require "pp"
class Feed < ActiveRecord::Base class Feed < ActiveRecord::Base
has_many :items has_many :items
@ -10,13 +11,17 @@ class Feed < ActiveRecord::Base
def fetch! def fetch!
Feedzirra::Feed.fetch_and_parse(url, Feedzirra::Feed.fetch_and_parse(url,
:on_success => lambda do |url, feed| :on_success => lambda do |url, feed|
feed.entries.each do |entry| begin
unless Item.exists?(:url => entry.url) feed.entries.each do |entry|
entry.sanitize! unless Item.exists?(:url => entry.url)
items << Item.create_from_feed_entry!(entry) entry.sanitize!
end items << Item.create_from_feed_entry!(entry)
end end
by_url(url).update_attribute(:has_errors, false) end
by_url(url).update_attribute(:has_errors, false)
rescue Exception => e
pp e
end
end, end,
:on_failure => lambda do |url, response_code, response_header, response_body| :on_failure => lambda do |url, response_code, response_header, response_body|
by_url(url).update_attribute(:has_errors, true) by_url(url).update_attribute(:has_errors, true)

View file

@ -9,12 +9,13 @@ class Item < ActiveRecord::Base
def self.create_from_feed_entry!(feed_entry) def self.create_from_feed_entry!(feed_entry)
feed_entry.sanitize! feed_entry.sanitize!
self.create!( self.create!(
:title => feed_entry.title, :title => feed_entry.title,
:url => feed_entry.url, :url => feed_entry.url,
:author => feed_entry.author, :author => feed_entry.author || "Anonymous",
:published_at => feed_entry.published, :published_at => feed_entry.published,
:content => feed_entry.content :content => feed_entry.content || feed_entry.summary || "No content available"
) )
end end