|
|
require 'rubygems'
require 'grit'
$KCODE = 'u'
RAILS_DIR = '/Users/fxn/prj/rails'
$changelogs = Marshal.load(File.open('changelogs').read) rescue {}
END { File.open('changelogs', 'wb') {|f| Marshal.dump($changelogs, f)} }
# Simple trick to be able to post this script with readable addresses.
def email(user, domain)
user + '@' + domain
end
# Some people appear in Rails logs under different names, there are nicks,
# typos, email addresses, shortenings, etc. This is a hand-made list to map
# them in order to be able to aggregate commits from the same real author.
SEEN_IN_LOG_ALSO_AS = {
'Aliaksey Kandratsenka' => 'Aleksey Kondratenko',
'Andrew Kaspick' => [email('akaspick', 'gmail.com'), 'akaspick'],
'Anthony Eden' => 'aeden',
'Austin Ziegler' => 'Thanks to Austin Ziegler for Transaction::Simple',
'Blaine' => ['blaine', email('blaine', 'odeo.com')],
'Bob Silva' => 'BobSilva',
'Brad Greenlee' => 'bgreenlee',
'Cheah Chu Yeow' => ['Chu Yeow', 'chuyeow'],
'court3nay' => ['courtenay', email('court3nay', 'gmail.com')],
'Chris McGrath' => [email('c.r.mcgrath', 'gmail.com'), 'c.r.mcgrath', email('chris', 'octopod.info'), 'octopod'],
'Chris Roos' => 'chrisroos',
'Damian Janowski' => 'djanowski',
'Dan Manges' => 'dcmanges',
'Daniel Morrison' => 'danielmorrison',
'Daniel Von Fange' => ['Suggested by Daniel Von Fange', 'Spotted by Daniel Von Fange'],
'Dave Thomas' => [email('dave', 'pragprog.com'), 'After much pestering from Dave Thomas'],
'David Heinemeier Hansson' => 'DHH',
'Don Park' => email('don.park', 'gmail.com'),
'Dreamer3' => email('dreamer3', 'gmail.com'),
'Duncan Beevers' => 'duncanbeevers',
'Eloy Duran' => 'alloy',
'Emilio Tagua' => 'miloops',
'Ernesto Jimenez' => 'ernesto.jimenez',
'Frederick Cheung' => ['fcheung', 'Fred Cheung', 'frederick.cheung', email('frederick.cheung', 'gmail.com')],
'Geoff Buesing' => ['gbuesing', 'Geoffrey Buesing'],
'Geoff Garside' => 'ggarside',
'Grant Hollingworth' => email('grant', 'antiflux.org'),
'Hampton Catlin' => email('hcatlin', 'gmail.com'),
'Hongli Lai (Phusion)' => ['Hongli Lai (Phusion', 'FooBarWidget'],
'Ian White' => email('ian.w.white', 'gmail.com'),
'Isaac Feliu' => 'isaacfeliu',
'Jack Danger Canty' => %w(danger Danger),
'Jarkko Laine' => [email('jarkko', 'jlaine.net'), 'Jarkko', 'jarkko'],
'James Adam' => 'lazyatom',
'James Mead' => 'floehopper',
'Jeremy Evans' => [email('jeremyevans0', 'gmail.com'), 'jeremyevans'],
'Jeremy Kemper' => 'bitsweat',
'Jeremy McAnally' => ['jeremymcnally', 'jeremymcanally'],
'John Barnette' => 'jbarnette',
'Jonathan del Strother' => ['Catfish', 'catfish'],
'Jonathan Weiss' => 'jweiss',
'Jordi Bunster' => 'jordi',
'José Valim' => 'josevalim',
'Josh Knowles' => email('joshknowles', 'gmail.com'),
'Josh Peek' => ['josh', 'Josh', 'Joshua Peek', 'joshpeek', email('josh', 'joshpeek.com')],
'Josh Starcher' => email('josh.starcher', 'gmail.com'),
'Josh Susser' => ['hasmanyjosh', email('josh', 'hasmanythrough.com')],
'Juanjo Bazan' => 'juanjo.bazan',
'Justin French' => 'justinfrench',
'Kamal Fariz Mahyuddin' => 'kamal',
'Ken Barker' => email('ken.barker', 'gmail.com'),
'Ken Miller' => email('kenneth.miller', 'bitfield.net'),
'Kevin Clark' => "Kevin Clark #{email('kevin.clark', 'gmail.com')}",
'Luke Redpath' => email('contact', 'lukeredpath.co.uk'),
'Manfred Stienstra' => [email('m.stienstra', 'fngtps.com'), 'manfred'],
'Marcel Molina' => ['Marcel Molina Jr.', 'Marcel'],
'Mark Somerville' => 'Spakman',
'Michael Klishin' => 'Michael S. Klishin',
'Michael Koziarski' => %w(Koz nzkoz),
'Michael Schoen' => 'Michael A. Schoen',
'Michael Schubert' => [email('michael', 'schubert'), email('michael', 'schubert.cx')],
'Michael Schuerig' => [email('michael', 'schuerig.de'), 'Michael Shuerig'],
'Mike Naberezny' => 'mnaberez',
'Mikel Lindsaar' => ['mikel', 'raasdnil'],
'Mislav Marohnić' => ['mislav', 'mislaw', email('mislav', 'nippur.irb.hr')],
'Nathan Weizenbaum' => 'Nex3',
'Nick Sieger' => ['nicksieger', email('nicksieger', 'gmail.com')],
'Nik Wakelin' => 'nik.wakelin',
'Obie Fernandez' => 'ObieFernandez',
'Philip Hallstrom' => 'phallstrom',
'Pratik Naik' => %w(Pratik lifofifo lifo),
'Rick Olson' => ['rick', 'Rick', 'Rick Olsen', email('technoweenie', 'gmail.com')],
'Rob Biedenharn' => ['rabiedenharn', email('Rob', 'AgileConsultingLLC.com')],
'Robby Russell' => 'robbyrussell',
'Ryan Bates' => 'ryanb',
'Ryan Davis' => 'zenspider',
'Ryan Tomayko' => email('rtomayko', 'gmail.com'),
'Sam Granieri' => 'sjgman9',
'Sebastian Kanthak' => [email('sebastian.kanthak', 'muehlheim.de'), 'sebastian.kanthak', 'skanthak'],
'Seth Rasmussen' => 'loincloth',
'Stefan Kaes' => [email('skaes', 'web.de'), 'skaes', 'Stephan Kaes', 'Skaes', 'skaes.web.de'],
'Steve Purcell' => email('stephen_purcell', 'yahoo.com'),
'Tarmo Tänav' => ['tarmo', 'tarmo_t', 'Tarmo Täna'],
'Thijs van der Vossen' => ['thijsv', email('thijs', 'vandervossen.net'), email('thijs', 'fngtps.com')],
'Tim Pope' => 'tpope',
'Tobias Lütke' => ['Tobias Luetke', 'TobiasLuetke'],
'Tom Ward' => ['Tom ward', 'tomafro'],
'Will Bryant' => 'will.bryant',
'Xavier Noria' => 'fxn',
'Zach Dennis' => 'zdennis',
}
# Reverse SEEN_IN_LOG_ALSO_AS as NAME_NORMALIZER.
NAME_NORMALIZER = {}
SEEN_IN_LOG_ALSO_AS.each do |name, also_as|
[*also_as].each { |alt| NAME_NORMALIZER[alt] = name }
end
def normalize_name(name)
name = name.sub(/\s*<[^>]+>/, '') # remove any email address in angles
name.strip!
NAME_NORMALIZER[name] || name
end
def commit_from_svn?(commit)
commit.message.include?('git-svn-id:')
end
# When Rails had a svn repo there was a convention for authors: the committer
# put their name between brackets at the end of the commit or changelog message.
# For example:
#
# Fix case-sensitive validates_uniqueness_of. Closes #11366 [miloops]
#
# Of course this is not robust, but it is the best we can get.
def extract_svn_authors_from_message(message)
svn_authors = []
if message =~ /\[([^\]]+)\]\s*$/ # end-of-line anchor on purpose
# [Adam Milligan, Pratik]
# [Rick Olson/Nicholas Seckar]
# [Kevin Clark & Jeremy Hopple]
$1.split(%r{\s*[,/&]\s*}).each do |c|
if looks_like_an_author_name(c)
svn_authors << c
end
end
end
svn_authors
end
# Returns a list of authors extracted from the corresponding svn commit.
def extract_svn_authors(commit)
svn_authors = extract_svn_authors_from_message(commit.message)
if svn_authors.empty?
svn_authors = extract_svn_authors_diffing(commit)
end
svn_authors = [commit.author.name] if svn_authors.empty?
svn_authors.map {|a| normalize_name(a)}.uniq
end
# Searches for author names in CHANGELOGs.
def extract_svn_authors_diffing(commit)
unless $changelogs.has_key?(commit.id)
$changelogs[commit.id] = []
Dir.chdir(RAILS_DIR) do
in_changelog = false
git_show(commit).each do |line|
if line =~ /^diff --git/
in_changelog = false
next
end
if line =~ /^\+\+\+.*changelog$/i
in_changelog = true
next
end
if in_changelog && line =~ /^\+\s*\*/
$changelogs[commit.id] << line
end
end
end
end
$changelogs[commit.id].map {|line| extract_svn_authors_from_message(line)}.flatten
end
def git_show(commit)
`git show #{commit.id}`
end
# Author name extraction in svn commits returns a few strings we just ignore.
def looks_like_an_author_name(str)
str !~ /\A\d+\z/ && # Remove side effects of [5684]
str !~ /\A\s*\z/ &&
str != 'See rails ML' &&
str != 'subject "Text::Format Licence Exception" on Oct 15' &&
str !~ /RubyConf/ && # example: RubyConf '05
str !~ /^Includes duplicates of changes/ # example: Includes duplicates of changes from 1.1.4 - 1.2.3
end
def with_munged_email_addresses(name)
name.gsub(/([\w.]+)@([\w.]+)/, "\\1 ~ at ~ \\2")
end
def print_authors(authors)
authors.sort {|a,b| b[1] <=> a[1]}.each do |name, count|
puts "%4d %s" % [count, with_munged_email_addresses(name)]
end
end
def process_commits!
authors = Hash.new(0)
repo = Grit::Repo.new(RAILS_DIR)
i = 0
page_size = 100
loop do
commits = repo.commits('master', page_size, i)
break if commits.empty?
commits.each do |commit|
if commit_from_svn?(commit)
extract_svn_authors(commit).each {|a| authors[normalize_name(a)] += 1}
else
author_name = commit.author.name
if looks_like_an_author_name(author_name)
authors[normalize_name(author_name)] += 1
end
end
i += 1
end
end
print_authors(authors)
end
process_commits!
|