require 'yaml'
require 'nokogiri'
require 'open-uri'
file = Nokogiri::HTML(open("http://lesswrong.com/lw/38u/best_career_models_for_doing_research/"))
deleted = []
comments = file.css("#comments .comment").map do |c|
if c.css(".comment-meta").text =~ /comment deleted/i
deleted << c["id"].gsub(/thingrow_.._/,"")
next
else
{ "id" => c["id"].gsub(/thingrow_.._/,""),
"author" => c.css(".author")[0].text,
"time" => c.css(".comment-date")[0].text,
"votes" => c.css(".votes")[0].text.to_i,
"body" => c.css(".comment-content")[0].text }
end
end
comments = comments.select {|e| e}
last_saved = Dir.glob("comments-*").map {|e| e.gsub(/^comments-/,"").gsub(/\.yml$/,"").to_i}.sort[-1]
if last_saved
old_data = eval(File.read("comments-#{last_saved}.yml"))
old_deleted = old_data[0]
old_ids = old_data[1].map {|e| e["id"]}
deleted = deleted + (old_ids - comments.map{|e| e["id"]})
new_deleted = deleted - old_deleted
else
new_deleted = deleted
end
saved = File.open("comments-#{Time.now.to_i}.yml", "w")
saved.puts [deleted, comments].inspect
puts "Deleted comments: #{new_deleted.inspect}"