RSS, Ruby, & the Web

by Dave Thomas



Listing One 



require 'webrick'

server = WEBrick::HTTPServer.new(:Port => 2000, :DocumentRoot => "html")

trap("INT") { server.shutdown }

server.start





Listing Two 



class Article 

  attr_reader :file_name, :title, :body

  def initialize(file_name)

    body  = File.read(file_name)

    title = file_name

    if body =~ %r{<title.*?>(.*?)</title}m ||

       body =~ %r{<h1.*?>(.*?)</h1}m 

      title = $1

    end

    body.sub!(%r{<body.*?>(.*)</body.*}m) { $1 }

    @file_name, @title, @body = file_name, title, body

  end

end





Listing Three 



def Article.list(dir)

  Dir.chdir(dir) do

    file_list = Dir.glob("**/*.html")

    sorted_list = file_list.sort_by {|name| File.stat(name).mtime }

    sorted_list.reverse[0, 10].map do |file_name|

      Article.new(file_name)

    end

  end

end





Listing Four 



class BlogServlet < WEBrick::HTTPServlet::AbstractServlet

  HEAD = "<head><title>Simple Blog</title></head>"

  def do_GET(req, res)

    articles = Article.list(@server.config[:DocumentRoot])

    content  = articles.map{|a| a.body}.join("<hr />")

    res.body = "<html>#{HEAD}<body>#{content}</body></html>"

  end

end

server.mount("/blog", BlogServlet)





Listing 5 



<rss version="0.91">

  <channel>

    <title>BBC News | News Front Page | World Edition</title>

    <link>

      http://news.bbc.co.uk/go/click/rss/0.91/public/-/2/hi/default.stm

    </link>

    <description>

      Updated every minute of every day - FOR PERSONAL USE ONLY

    </description>

    <language>en-gb</language>

    <lastBuildDate>Tue, 21 Sep 04 21:46:23 GMT</lastBuildDate>

    <copyright>

      Copyright: (C) British Broadcasting Corporation,

      http://news.bbc.co.uk/1/hi/help/3281849.stm

    </copyright>

    <docs>http://www.bbc.co.uk/syndication/</docs>

    <image>

      <title>BBC News</title>

      <url>

    http://news.bbc.co.uk/nol/shared/img/bbc_news_120x60.gif

      </url>

      <link>http://news.bbc.co.uk</link>

    </image>

    <!-- list of items ... -->

  </channel>

</rss>





Listing 6 



class RssServlet < WEBrick::HTTPServlet::AbstractServlet

  def do_GET(req, res)

    my_uri = req.request_uri

    rss = RSS::Rss.new("0.9")

    chan = RSS::Rss::Channel.new

    chan.title = "My Blog 99"

    my_uri.path = ""          # link back to the top-level

    chan.link = my_uri.to_s

    rss.channel = chan

    Article.list(@server.config[:DocumentRoot]).each do |article|

      item = RSS::Rss::Channel::Item.new

      my_uri.path = "/" + article.file_name

      item.link  = my_uri.to_s

      item.title = article.title

      item.description = article.body

      chan.items << item

    end

    res['Content-Type'] = "text/xml"

    res.body = rss.to_s

  end

end

server.mount("/rss", RssServlet)





Listing Seven



require 'rss/2.0'

require 'etc'

DROP_DIR = "/var/www/pragprog/data/rss" # Where we create our RSS files

MAX_TO_KEEP = 10 # Max entries in the RSS file

# Just need the top-level project name

dir = ARGV.shift

repo = dir.split(%r{/})[0]

FILENAME = File.join(DROP_DIR, repo + ".rss")

# Read in existing rss (we'll write it out again to the new file)

begin

  existing_data = RSS::Parser.parse(File.read(FILENAME), false)

rescue

  existing_data = RSS::Rss.new("2.0")

end

# write out the new item and up to 9 old items

rss = RSS::Rss.new("2.0")

chan = RSS::Rss::Channel.new

chan.title = chan.description = "Commit Summary: #{repo}"

rss.channel = chan

# Read in the loginfo msg and highlight the headings

desc = ""

while line = STDIN.gets

  line.chomp!

  if line =~ /^[A-Z].*:\s*$/

    desc << "<p /><b>" << line << "</b>"

  else

    desc << line

  end

  desc << "<br />"

end

# new top item is current loginfo

item = RSS::Rss::Channel::Item.new

item.title =  Time.now.strftime("%b %d, %H:%M") + dir

item.pubDate = Time.now

item.description = desc

chan.items << item

# Then up to `n' items from old data

chan.items.concat existing_data.items[0, MAX_TO_KEEP-1]



File.open(FILENAME, "w") {|f| f.puts(rss.to_s) }



Listing Eight 



# Such down the top 'n' articles from an RSS feed and summarize for a web site

require 'open-uri'

require 'rss/0.9'

require 'rdoc/template'

require 'net/ftp'



TMP_FILE = "/tmp/topfive"

BLOG_URL = 'http://pragprog.com/pragdave/synopsis.rss?count=5'



TEMPLATE = %{

<ul>

START:entries

<li><a href="%link%">%title%</span></a>%description%</li>

END:entries

</ul>

}

open(BLOG_URL) do |http|

  result = RSS::Parser.parse(http.read, false)

  # Convert an array of RSS items into an array

  # of hashes

  entries = result.items.map do |item|

    { 

      'title'       => item.title,

      'link'        => item.link,

      'description' => item.description

    }

  end

  File.open(TMP_FILE, "w") do |f|

    t = TemplatePage.new(TEMPLATE)

    t.write_html_on(f, 'entries' => entries)

  end

end

Net::FTP.open('www.pragprog.com') do |ftp|

  ftp.login('username', 'password')

  ftp.chdir('portal/sideboxes')

  ftp.put(TMP_FILE, 'topfive', 1024)

end















4



