1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
require 'yaml'
require 'nokogiri'
require 'open-uri'

file = Nokogiri::HTML(open("http://lesswrong.com/lw/38u/best_career_models_for_doing_research/"))

deleted = []

comments = file.css("#comments .comment").map do |c|
  # check for deleted comments that were replyed to...
  if c.css(".comment-meta").text =~ /comment deleted/i
    deleted << c["id"].gsub(/thingrow_.._/,"")
    next
  else
    { "id" => c["id"].gsub(/thingrow_.._/,""),
      "author" => c.css(".author")[0].text,
      "time" => c.css(".comment-date")[0].text,
      "votes" => c.css(".votes")[0].text.to_i,
      "body" => c.css(".comment-content")[0].text }
  end
end
comments = comments.select {|e| e}

# check for deleted comments that weren't replyed to...
last_saved = Dir.glob("comments-*").map {|e| e.gsub(/^comments-/,"").gsub(/\.yml$/,"").to_i}.sort[-1]
if last_saved
  old_data = eval(File.read("comments-#{last_saved}.yml"))
  old_deleted = old_data[0]
  old_ids = old_data[1].map {|e| e["id"]}

  deleted = deleted + (old_ids - comments.map{|e| e["id"]})
  new_deleted = deleted - old_deleted
else
  new_deleted = deleted
end


saved = File.open("comments-#{Time.now.to_i}.yml", "w")
saved.puts [deleted, comments].inspect

puts "Deleted comments: #{new_deleted.inspect}"