Report abuse


			
require 'rubygems'
require 'grit'

$KCODE = 'u'
RAILS_DIR = '/Users/fxn/prj/rails'

$changelogs = Marshal.load(File.open('changelogs').read) rescue {}
END { File.open('changelogs', 'wb') {|f| Marshal.dump($changelogs, f)} }

# Simple trick to be able to post this script with readable addresses.
def email(user, domain)
  user + '@' + domain
end

# Some people appear in Rails logs under different names, there are nicks,
# typos, email addresses, shortenings, etc. This is a hand-made list to map
# them in order to be able to aggregate commits from the same real author.
SEEN_IN_LOG_ALSO_AS = {
  'Aliaksey Kandratsenka'    => 'Aleksey Kondratenko',
  'Andrew Kaspick'           => [email('akaspick', 'gmail.com'), 'akaspick'],
  'Anthony Eden'             => 'aeden',
  'Austin Ziegler'           => 'Thanks to Austin Ziegler for Transaction::Simple',
  'Blaine'                   => ['blaine', email('blaine', 'odeo.com')],
  'Bob Silva'                => 'BobSilva',
  'Brad Greenlee'            => 'bgreenlee',
  'Cheah Chu Yeow'           => ['Chu Yeow', 'chuyeow'],
  'court3nay'                => ['courtenay', email('court3nay', 'gmail.com')],
  'Chris McGrath'            => [email('c.r.mcgrath', 'gmail.com'), 'c.r.mcgrath', email('chris', 'octopod.info'), 'octopod'],
  'Chris Roos'               => 'chrisroos',
  'Damian Janowski'          => 'djanowski',
  'Dan Manges'               => 'dcmanges',
  'Daniel Morrison'          => 'danielmorrison',
  'Daniel Von Fange'         => ['Suggested by Daniel Von Fange', 'Spotted by Daniel Von Fange'],
  'Dave Thomas'              => [email('dave', 'pragprog.com'), 'After much pestering from Dave Thomas'],
  'David Heinemeier Hansson' => 'DHH',
  'Don Park'                 => email('don.park', 'gmail.com'),
  'Dreamer3'                 => email('dreamer3', 'gmail.com'),
  'Duncan Beevers'           => 'duncanbeevers',
  'Eloy Duran'               => 'alloy',
  'Emilio Tagua'             => 'miloops',
  'Ernesto Jimenez'          => 'ernesto.jimenez',
  'Frederick Cheung'         => ['fcheung', 'Fred Cheung', 'frederick.cheung', email('frederick.cheung', 'gmail.com')],
  'Geoff Buesing'            => ['gbuesing', 'Geoffrey Buesing'],
  'Geoff Garside'            => 'ggarside',
  'Grant Hollingworth'       => email('grant', 'antiflux.org'),
  'Hampton Catlin'           => email('hcatlin', 'gmail.com'),
  'Hongli Lai (Phusion)'     => ['Hongli Lai (Phusion', 'FooBarWidget'],
  'Ian White'                => email('ian.w.white', 'gmail.com'),
  'Isaac Feliu'              => 'isaacfeliu',
  'Jack Danger Canty'        => %w(danger Danger),
  'Jarkko Laine'             => [email('jarkko', 'jlaine.net'), 'Jarkko', 'jarkko'],
  'James Adam'               => 'lazyatom', 
  'James Mead'               => 'floehopper',
  'Jeremy Evans'             => [email('jeremyevans0', 'gmail.com'), 'jeremyevans'],
  'Jeremy Kemper'            => 'bitsweat',
  'Jeremy McAnally'          => ['jeremymcnally', 'jeremymcanally'],
  'John Barnette'            => 'jbarnette',
  'Jonathan del Strother'    => ['Catfish', 'catfish'],
  'Jonathan Weiss'           => 'jweiss',
  'Jordi Bunster'            => 'jordi',
  'José Valim'               => 'josevalim',
  'Josh Knowles'             => email('joshknowles', 'gmail.com'),
  'Josh Peek'                => ['josh', 'Josh', 'Joshua Peek', 'joshpeek', email('josh', 'joshpeek.com')],
  'Josh Starcher'            => email('josh.starcher', 'gmail.com'),
  'Josh Susser'              => ['hasmanyjosh', email('josh', 'hasmanythrough.com')],
  'Juanjo Bazan'             => 'juanjo.bazan',
  'Justin French'            => 'justinfrench',
  'Kamal Fariz Mahyuddin'    => 'kamal',
  'Ken Barker'               => email('ken.barker', 'gmail.com'),
  'Ken Miller'               => email('kenneth.miller', 'bitfield.net'),
  'Kevin Clark'              => "Kevin Clark #{email('kevin.clark', 'gmail.com')}",
  'Luke Redpath'             => email('contact', 'lukeredpath.co.uk'),
  'Manfred Stienstra'        => [email('m.stienstra', 'fngtps.com'), 'manfred'],
  'Marcel Molina'            => ['Marcel Molina Jr.', 'Marcel'],
  'Mark Somerville'          => 'Spakman',
  'Michael Klishin'          => 'Michael S. Klishin',
  'Michael Koziarski'        => %w(Koz nzkoz),
  'Michael Schoen'           => 'Michael A. Schoen',
  'Michael Schubert'         => [email('michael', 'schubert'), email('michael', 'schubert.cx')],
  'Michael Schuerig'         => [email('michael', 'schuerig.de'), 'Michael Shuerig'],
  'Mike Naberezny'           => 'mnaberez',
  'Mikel Lindsaar'           => ['mikel', 'raasdnil'],
  'Mislav Marohnić'          => ['mislav', 'mislaw', email('mislav', 'nippur.irb.hr')],
  'Nathan Weizenbaum'        => 'Nex3',
  'Nick Sieger'              => ['nicksieger', email('nicksieger', 'gmail.com')],
  'Nik Wakelin'              => 'nik.wakelin',
  'Obie Fernandez'           => 'ObieFernandez',
  'Philip Hallstrom'         => 'phallstrom',
  'Pratik Naik'              => %w(Pratik lifofifo lifo),
  'Rick Olson'               => ['rick', 'Rick', 'Rick Olsen', email('technoweenie', 'gmail.com')],
  'Rob Biedenharn'           => ['rabiedenharn', email('Rob', 'AgileConsultingLLC.com')],
  'Robby Russell'            => 'robbyrussell',
  'Ryan Bates'               => 'ryanb',
  'Ryan Davis'               => 'zenspider',
  'Ryan Tomayko'             => email('rtomayko', 'gmail.com'),
  'Sam Granieri'             => 'sjgman9',
  'Sebastian Kanthak'        => [email('sebastian.kanthak', 'muehlheim.de'), 'sebastian.kanthak', 'skanthak'],
  'Seth Rasmussen'           => 'loincloth',
  'Stefan Kaes'              => [email('skaes', 'web.de'), 'skaes', 'Stephan Kaes', 'Skaes', 'skaes.web.de'],
  'Steve Purcell'            => email('stephen_purcell', 'yahoo.com'),
  'Tarmo Tänav'              => ['tarmo', 'tarmo_t', 'Tarmo Täna'],
  'Thijs van der Vossen'     => ['thijsv', email('thijs', 'vandervossen.net'), email('thijs', 'fngtps.com')],
  'Tim Pope'                 => 'tpope',
  'Tobias Lütke'             => ['Tobias Luetke', 'TobiasLuetke'],
  'Tom Ward'                 => ['Tom ward', 'tomafro'],
  'Will Bryant'              => 'will.bryant',
  'Xavier Noria'             => 'fxn',
  'Zach Dennis'              => 'zdennis',
}

# Reverse SEEN_IN_LOG_ALSO_AS as NAME_NORMALIZER.
NAME_NORMALIZER = {}
SEEN_IN_LOG_ALSO_AS.each do |name, also_as|
  [*also_as].each { |alt| NAME_NORMALIZER[alt] = name }
end

def normalize_name(name)
  name = name.sub(/\s*<[^>]+>/, '') # remove any email address in angles
  name.strip!
  NAME_NORMALIZER[name] || name
end

def commit_from_svn?(commit)
  commit.message.include?('git-svn-id:')
end

# When Rails had a svn repo there was a convention for authors: the committer
# put their name between brackets at the end of the commit or changelog message.
# For example:
#
#   Fix case-sensitive validates_uniqueness_of. Closes #11366 [miloops]
#
# Of course this is not robust, but it is the best we can get.
def extract_svn_authors_from_message(message)
  svn_authors = []
  if message =~ /\[([^\]]+)\]\s*$/ # end-of-line anchor on purpose
    # [Adam Milligan, Pratik]
    # [Rick Olson/Nicholas Seckar]
    # [Kevin Clark & Jeremy Hopple]
    $1.split(%r{\s*[,/&]\s*}).each do |c|
      if looks_like_an_author_name(c)
        svn_authors << c
      end
    end
  end
  svn_authors
end

# Returns a list of authors extracted from the corresponding svn commit.
def extract_svn_authors(commit)
  svn_authors = extract_svn_authors_from_message(commit.message)
  if svn_authors.empty?
    svn_authors = extract_svn_authors_diffing(commit)
  end
  svn_authors = [commit.author.name] if svn_authors.empty?
  svn_authors.map {|a| normalize_name(a)}.uniq
end

# Searches for author names in CHANGELOGs.
def extract_svn_authors_diffing(commit)
  unless $changelogs.has_key?(commit.id)
    $changelogs[commit.id] = []
    Dir.chdir(RAILS_DIR) do
      in_changelog = false
      git_show(commit).each do |line|
        if line =~ /^diff --git/
          in_changelog = false
          next
        end
        if line =~ /^\+\+\+.*changelog$/i
          in_changelog = true
          next
        end
        if in_changelog && line =~ /^\+\s*\*/
          $changelogs[commit.id] << line
        end
      end
    end
  end
  $changelogs[commit.id].map {|line| extract_svn_authors_from_message(line)}.flatten
end

def git_show(commit)
  `git show #{commit.id}`
end

# Author name extraction in svn commits returns a few strings we just ignore.
def looks_like_an_author_name(str)
  str !~ /\A\d+\z/ && # Remove side effects of [5684]
  str !~ /\A\s*\z/ &&
  str != 'See rails ML' &&
  str != 'subject "Text::Format Licence Exception" on Oct 15' &&
  str !~ /RubyConf/ && # example: RubyConf '05
  str !~ /^Includes duplicates of changes/ # example: Includes duplicates of changes from 1.1.4 - 1.2.3
end

def with_munged_email_addresses(name)
  name.gsub(/([\w.]+)@([\w.]+)/, "\\1 ~ at ~ \\2")
end

def print_authors(authors)
  authors.sort {|a,b| b[1] <=> a[1]}.each do |name, count|
    puts "%4d %s" % [count, with_munged_email_addresses(name)]
  end
end

def process_commits!
  authors = Hash.new(0)
  repo = Grit::Repo.new(RAILS_DIR)
  i = 0
  page_size = 100

  loop do
    commits = repo.commits('master', page_size, i)
    break if commits.empty?
    commits.each do |commit|
      if commit_from_svn?(commit)
        extract_svn_authors(commit).each {|a| authors[normalize_name(a)] += 1}  
      else
        author_name = commit.author.name
        if looks_like_an_author_name(author_name) 
          authors[normalize_name(author_name)] += 1
        end
      end
      i += 1    
    end
  end
  
  print_authors(authors)
end

process_commits!