Pastie now auto-senses if line-wrap is a bad or good idea. Feedback?
## mark a section (Learn more)
require 'rubygems' require 'scrubyt' require 'nokogiri' #scrape the stuff with sRUBYt! data = Scrubyt::Extractor.define do fetch 'http://splatbang.com/rubyquiz/' quiz "//div[@id='side']/ol/li/a[1]" do link_url do quiz_id /id=(\d+)/ quiz_link /id=(.+)/ do quiz_desc_url lambda {|quiz_dir| "http://splatbang.com/rubyquiz/#{quiz_dir}/quiz.txt"}, :type => :script do quiz_dl 'descriptions', :type => :download end quiz_summary_url lambda {|quiz_dir| "http://splatbang.com/rubyquiz/#{quiz_dir}/summ.txt"}, :type => :script do quiz_dl 'summaries', :type => :download end end end quiz_detail :resolve => "http://splatbang.com/rubyquiz" do solution "/html/body/div/div[2]/ol/li/a" do author lambda {|solution_link_text| solution_link_text}, :type => :script ruby_talk_reference "href", :type => :attribute solution_detail :resolve => :full do text "//pre[1]" end end end end.select_indices(0..2) end #post process with Nokogiri result = Nokogiri::XML(data.to_xml) (result/"//quiz").each do |quiz| quiz_id = quiz.text[/\s(\d+)\s/,1].to_i file_index = quiz_id > 157 ? "_#{(quiz_id - 157)}" : "" (quiz/"//link_url").first.unlink desc = Nokogiri::XML::Element.new("description", quiz.document) desc.content =open("descriptions/quiz#{file_index}.txt").read quiz.add_child(desc) summary = Nokogiri::XML::Element.new("summary", quiz.document) summary.content =open("summaries/summ#{file_index}.txt").read quiz.add_child(summary) end open("ruby_quiz_archive.xml", "w") {|f| f.write result}
This paste will be private.
From the Design Piracy series on my blog: