blob: 6eaedfaaf9a3e1a66c98e8478308b2ab77fcc844 [file] [log] [blame] [edit]
# Copyright (c) 2011, Apple Inc. All rights reserved.
# Copyright (c) 2009, 2011, 2012 Google Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# WebKit's Python module for committer and reviewer validation.
import fnmatch
import json
from webkitpy.common.editdistance import edit_distance
from webkitpy.common.memoized import memoized
from webkitpy.common.system.filesystem import FileSystem
# The list of contributors have been moved to contributors.json
class Contributor(object):
def __init__(self, name, email_or_emails, irc_nickname_or_nicknames=None):
assert(name)
assert(email_or_emails)
self.full_name = name
if isinstance(email_or_emails, str):
self.emails = [email_or_emails]
else:
self.emails = email_or_emails
self.emails = map(lambda email: email.lower(), self.emails) # Emails are case-insensitive.
if isinstance(irc_nickname_or_nicknames, str):
self.irc_nicknames = [irc_nickname_or_nicknames]
else:
self.irc_nicknames = irc_nickname_or_nicknames
self.can_commit = False
self.can_review = False
def bugzilla_email(self):
# FIXME: We're assuming the first email is a valid bugzilla email,
# which might not be right.
return self.emails[0]
def __str__(self):
return unicode(self).encode('utf-8')
def __unicode__(self):
return '"%s" <%s>' % (self.full_name, self.emails[0])
def contains_string(self, search_string):
string = search_string.lower()
if string in self.full_name.lower():
return True
if self.irc_nicknames:
for nickname in self.irc_nicknames:
if string in nickname.lower():
return True
for email in self.emails:
if string in email:
return True
return False
def matches_glob(self, glob_string):
if fnmatch.fnmatch(self.full_name, glob_string):
return True
if self.irc_nicknames:
for nickname in self.irc_nicknames:
if fnmatch.fnmatch(nickname, glob_string):
return True
for email in self.emails:
if fnmatch.fnmatch(email, glob_string):
return True
return False
class Committer(Contributor):
def __init__(self, name, email_or_emails, irc_nickname=None):
Contributor.__init__(self, name, email_or_emails, irc_nickname)
self.can_commit = True
class Reviewer(Committer):
def __init__(self, name, email_or_emails, irc_nickname=None):
Committer.__init__(self, name, email_or_emails, irc_nickname)
self.can_review = True
class CommitterList(object):
# Committers and reviewers are passed in to allow easy testing
def __init__(self,
committers=[],
reviewers=[],
contributors=[]):
# FIXME: These arguments only exist for testing. Clean it up.
if not (committers or reviewers or contributors):
loaded_data = self.load_json()
contributors = loaded_data['Contributors']
committers = loaded_data['Committers']
reviewers = loaded_data['Reviewers']
self._contributors = contributors + committers + reviewers
self._committers = committers + reviewers
self._reviewers = reviewers
self._contributors_by_name = {}
self._accounts_by_email = {}
self._accounts_by_login = {}
@staticmethod
@memoized
def load_json():
filesystem = FileSystem()
json_path = filesystem.join(filesystem.dirname(filesystem.path_to_module('webkitpy.common.config')), 'contributors.json')
contributors = json.loads(filesystem.read_text_file(json_path))
return {
'Contributors': [Contributor(name, data.get('emails'), data.get('nicks')) for name, data in contributors['Contributors'].iteritems()],
'Committers': [Committer(name, data.get('emails'), data.get('nicks')) for name, data in contributors['Committers'].iteritems()],
'Reviewers': [Reviewer(name, data.get('emails'), data.get('nicks')) for name, data in contributors['Reviewers'].iteritems()],
}
def contributors(self):
return self._contributors
def committers(self):
return self._committers
def reviewers(self):
return self._reviewers
def _name_to_contributor_map(self):
if not len(self._contributors_by_name):
for contributor in self._contributors:
assert(contributor.full_name)
assert(contributor.full_name.lower() not in self._contributors_by_name) # We should never have duplicate names.
self._contributors_by_name[contributor.full_name.lower()] = contributor
return self._contributors_by_name
def _email_to_account_map(self):
if not len(self._accounts_by_email):
for account in self._contributors:
for email in account.emails:
assert(email not in self._accounts_by_email) # We should never have duplicate emails.
self._accounts_by_email[email] = account
return self._accounts_by_email
def _login_to_account_map(self):
if not len(self._accounts_by_login):
for account in self._contributors:
if account.emails:
login = account.bugzilla_email()
assert(login not in self._accounts_by_login) # We should never have duplicate emails.
self._accounts_by_login[login] = account
return self._accounts_by_login
def _committer_only(self, record):
if record and not record.can_commit:
return None
return record
def _reviewer_only(self, record):
if record and not record.can_review:
return None
return record
def contributor_by_irc_nickname(self, irc_nickname):
for contributor in self.contributors():
# FIXME: This should do case-insensitive comparison or assert that all IRC nicknames are in lowercase
if contributor.irc_nicknames and irc_nickname in contributor.irc_nicknames:
return contributor
return None
def contributors_by_search_string(self, string):
glob_matches = filter(lambda contributor: contributor.matches_glob(string), self.contributors())
return glob_matches or filter(lambda contributor: contributor.contains_string(string), self.contributors())
def contributors_by_email_username(self, string):
string = string + '@'
result = []
for contributor in self.contributors():
for email in contributor.emails:
if email.startswith(string):
result.append(contributor)
break
return result
def _contributor_name_shorthands(self, contributor):
if ' ' not in contributor.full_name:
return []
split_fullname = contributor.full_name.split()
first_name = split_fullname[0]
last_name = split_fullname[-1]
return first_name, last_name, first_name + last_name[0], first_name + ' ' + last_name[0]
def _tokenize_contributor_name(self, contributor):
full_name_in_lowercase = contributor.full_name.lower()
tokens = [full_name_in_lowercase] + full_name_in_lowercase.split()
if contributor.irc_nicknames:
return tokens + [nickname.lower() for nickname in contributor.irc_nicknames if len(nickname) > 5]
return tokens
def contributors_by_fuzzy_match(self, string):
string_in_lowercase = string.lower()
# 1. Exact match for fullname, email and irc_nicknames
account = self.contributor_by_name(string_in_lowercase) or self.contributor_by_email(string_in_lowercase) or self.contributor_by_irc_nickname(string_in_lowercase)
if account:
return [account], 0
# 2. Exact match for email username (before @)
accounts = self.contributors_by_email_username(string_in_lowercase)
if accounts and len(accounts) == 1:
return accounts, 0
# 3. Exact match for first name, last name, and first name + initial combinations such as "Dan B" and "Tim H"
accounts = [contributor for contributor in self.contributors() if string in self._contributor_name_shorthands(contributor)]
if accounts and len(accounts) == 1:
return accounts, 0
# 4. Finally, fuzzy-match using edit-distance
string = string_in_lowercase
contributorWithMinDistance = []
minDistance = len(string) / 2 - 1
for contributor in self.contributors():
tokens = self._tokenize_contributor_name(contributor)
editdistances = [edit_distance(token, string) for token in tokens if abs(len(token) - len(string)) <= minDistance]
if not editdistances:
continue
distance = min(editdistances)
if distance == minDistance:
contributorWithMinDistance.append(contributor)
elif distance < minDistance:
contributorWithMinDistance = [contributor]
minDistance = distance
if not len(contributorWithMinDistance):
return [], len(string)
return contributorWithMinDistance, minDistance
def contributor_by_email(self, email):
return self._email_to_account_map().get(email.lower()) if email else None
def contributor_by_name(self, name):
return self._name_to_contributor_map().get(name.lower()) if name else None
def committer_by_email(self, email):
return self._committer_only(self.contributor_by_email(email))
def committer_by_name(self, name):
return self._committer_only(self.contributor_by_name(name))
def reviewer_by_email(self, email):
return self._reviewer_only(self.contributor_by_email(email))
def reviewer_by_name(self, email):
return self._reviewer_only(self.contributor_by_name(email))