items = []
open('http://minnesota.publicradio.org/radio/services/the_current/songs_played/') do |source|
doc = Hpricot(source)
divs = ((doc/"span.regular")/"div")
rows=[]
divs.size.times do |i|
rows << [divs[i], divs[i+1]] if (i.even?)
end
rows.each do |time_div, song_div|
time = (time_div/"").inner_html #need to parse out the date and am + pm from the page
artist = (song_div).inner_html.match(/<[Aa].+>(.+)<\/[Aa]>.?-.?(.+)/)[1]
title = (song_div).inner_html.match(/<[Aa].+>(.+)<\/[Aa]>.?-.?(.+)/)[2]
if artist && artist.size > 0
items << {:song => {:artist => artist, :title => title, :start_time => time}}
end
end
end
items