Report abuse

" unless current_score.nil? current_score = buddy.score out << "

			
# twitter_friends_of_friends.rb
# by Greg Houston
# blog: http://ghouston.blogspot.com
# wiki: http://ghouston.wiki.zoho.com/
#
# this script is posted online at http://pastie.caboo.se/195011
# 
# special thanks to Yoan Blanc 
# http://yoan.dosimple.ch/blog/2007/05/17/
# which put this idea in my head.  this code is based on his script http://yoan.dosimple.ch/blog/2007/05/17/graph2.rb
#
# twitter_friends_of_friends.rb looks at all the followers and follower's followers
# (e.g. 2nd level deep) to produce a Long Tail report.
#
# the report is output in an HTML fragment which I plan to post
# on my wiki. 
#
# usage:  ./ruby twitter_friends_of_friends.rb nick
# where nick is the starting nick.
# the results are written to twitter_friends_of_friends.html
# 
# notes:
#   hCard.find( @url ) returns an empty collection if there
#   was an error page received from Twitter.
#
#  the script uses a cache folder to store the downloaded html. it also
#  saves the yaml for each Twitt after parsing the html.  If this script is
#  run multiple times, it will use the cache to avoid extra work.  This makes
#  things go much faster while developing this script.  You will have to
#  clear the cache directory if you want to get the latest from Twitter.
#  
#  getting the img_url can be slow (expecially since this is single threaded),
#  so generating the output can take a long time.  please be patient.  If you
#  want to speed things up, look at the lines near the end which follow #NOTE:
#  comments.  You can change between the Fast option which doesn't get img_urls
#  for friends of friends.  Or the Slow option which provides a nicer output.
#
require 'rubygems'
require 'fileutils'
require 'mofo'
require 'hpricot'
require 'pp'
require 'rio'
require 'yaml'

def say( msg )
  puts msg
  STDOUT.flush # get it displayed faster.
end

FileUtils.mkpath( 'cache' )

class TwittSpace
  include Singleton

  def TwittSpace.size
    TwittSpace.instance.everyone.size
  end

  def TwittSpace.get( nick )
    TwittSpace.instance.get( nick )
  end

  def initialize
    @everyone = Hash.new
  end

  def get( nick )
    return @everyone[nick] if @everyone.include? nick
    yaml_file_path = 'cache/' + nick + '.yaml'
    if File.exists?( yaml_file_path )
      say "  loading #{yaml_file_path} (#{TwittSpace.size})"
      twitt = YAML::load( File.open( yaml_file_path ))
    else
      twitt = Twitt.new( nick )
    end
    @everyone[twitt.nick] = twitt
    twitt
  end

  def TwittSpace.everyone
    TwittSpace.instance.everyone
  end

  def everyone
    @everyone
  end
end

class Twitt
  attr_accessor :score
  attr_reader :url, :nick, :loaded, :address, :full_name

  def initialize( nick )
    @nick = nick
    @score = 0
    @url = 'http://twitter.com/' + @nick
  end

  def download( cache_file_path )
    page = ''
    3.times do
      begin
        page = rio( @url ).read
      rescue
        page = ''
        next
      end
      break unless page =~ /Status: 500 Internal Server Error/
      page = ''
    end

    if page.size > 0
      rio( cache_file_path ) < page
    end
    page
  end

  def to_yaml_properties
    (instance_variables - ["@following","@score"]).sort
  end

  def load
    # optimization: lazy load the basic attributes
    unless @loaded_base
      @loaded_base = true

      cache_file_path = 'cache/'+@nick+'.html'
      if File.exists?( cache_file_path )
        say "  loading #{@nick}.html (#{TwittSpace.size})"
        page = rio( cache_file_path ).read
      else
        say "  downloading #{@nick} (#{TwittSpace.size})"
        page = download( cache_file_path )
      end

      @contacts = hCard.find( :text => page )
      @contacts = [@contacts] if @contacts.class != Array

      doc = Hpricot( page )
      begin
        @img_url = (doc/"h2//img[@id='profile-image']")[0].attributes['src']
        @full_name = (doc/"span[@class='fn']").inner_html
        @address = (doc/"span[@class='adr']").inner_html
#      rescue NoMethodError
#        # was unable to load page
#        puts "    ERROR: UNABLE TO LOAD TWITTER HOMEPAGE"
#        @img_url = nil
#      rescue
#        puts "    ERROR: UNKNOWN ERROR"
#        puts page # attempting to find origin of other errors.
#        @img_url = nil
      end

      rio( 'cache/'+@nick+'.yaml' ) < self.to_yaml
    end
  end

  def load_following
    # optimization: lazy load the following attribute
    # this attribute is not include in the yaml cache
    # it has to be load separately from the basic
    # attributes to avoid getting into recusion
    unless @loaded_following
      @loaded_following = true
      @following = Hash.new
      @contacts.each do |contact|
        if contact.url =~ /^http:\/\/twitter.com\/.*/ || contact.url =~ /http:\/\/explore.twitter.com\/.*/
          nick = contact.url.slice( /[^\/]*$/ )
          @following[nick] = TwittSpace.get(nick)
        end
      end
    end
  end

  def contacts
    self.load
    @contacts
  end

  def img_url
    self.load
    @img_url
  end

  def following
    self.load
    self.load_following
    @following
  end
end

if __FILE__==$0
  say "starting..."
  STDOUT.flush
  root_nick = (ARGV[0].nil?) ? 'neversleep360' : ARGV[0]
  root = TwittSpace.get( root_nick )
#pp root
#say "forced load..."
#root.load
#pp root
#root.load_following
#pp root
#exit

  level1 = root.following # load level 1

  say "scoring friends..."
  level2 = Hash.new
  level1.each do | nick, twitt |
    say "  #{nick}"
    twitt.following.each do | nick2, twitt2 |
      twitt2.score += 1
      level2[nick2]=twitt2 unless level2.include?(nick2) || level1.include?(nick2)
    end
  end

  say "scoring friends of friends..."
  level2.each do | nick, twitt |
    say "  #{nick}"
    twitt.following.each do | nick2, twitt2 |
      twitt2.score += 1
    end
  end

  say "\n\ncalculating rank..."
  ranking = level1.values + level2.values
  say "  #{ranking.size} nicks"
  ranking.sort! do |a,b|
    result = -(a.score <=> b.score)
    (result == 0) ? a.nick.casecmp( b.nick ) : result
  end

  say "\n\nreporting results..."
  out = rio( "twitter_friends_of_friends.html" )
  out < ""
  current_score = nil
  column = 0
  ranking.each do |buddy|
    if current_score != buddy.score
      out << "

#{buddy.score}

" column = 1 end if column > 6 out << "" column = 1 end #NOTE: fast... but not all images #puts "" #NOTE: slow... has to load each buddy to get img_url out << "" column += 1 end out << "
#{(buddy.loaded) ? "" : "" }#{buddy.nick}#{buddy.nick}
" end