forked from colonelpanic/dotfiles
git_blame_counter
This commit is contained in:
parent
54e815a804
commit
50253d414c
202
dotfiles/lib/python/git_blame_counter.py
Executable file
202
dotfiles/lib/python/git_blame_counter.py
Executable file
@ -0,0 +1,202 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
import optparse
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
|
def segment(iterable, segment_length):
|
||||||
|
if segment_length is None:
|
||||||
|
yield iterable
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
|
def yield_length():
|
||||||
|
for _ in xrange(segment_length):
|
||||||
|
yield iterable.next()
|
||||||
|
while True:
|
||||||
|
segment = list(yield_length())
|
||||||
|
if not segment:
|
||||||
|
raise StopIteration
|
||||||
|
yield segment
|
||||||
|
|
||||||
|
|
||||||
|
def build_file_extension_re(file_extensions):
|
||||||
|
return '.*\.(?:' + '|'.join(file_extensions) + ')'
|
||||||
|
|
||||||
|
|
||||||
|
class BlameCounter(object):
|
||||||
|
|
||||||
|
DIVIDER = '------------------------------'
|
||||||
|
committer_matcher = re.compile('\((.*?)\s*[0-9]{4}')
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
search_expressions=(),
|
||||||
|
ignore_expressions=(),
|
||||||
|
filename_re='.*\.(?:py|tmpl)',
|
||||||
|
chunk_size=None,
|
||||||
|
):
|
||||||
|
self.path_matchers = [
|
||||||
|
re.compile(search_expression)
|
||||||
|
for search_expression in search_expressions
|
||||||
|
]
|
||||||
|
self.ignore_matchers = [
|
||||||
|
re.compile(ignore_expression)
|
||||||
|
for ignore_expression in ignore_expressions
|
||||||
|
]
|
||||||
|
self.filename_matcher = re.compile(filename_re)
|
||||||
|
self.chunk_size = chunk_size
|
||||||
|
self.blame_line_count_map = {}
|
||||||
|
|
||||||
|
def match_path_and_filename(self, path, filename):
|
||||||
|
filepath = os.path.join(path, filename)
|
||||||
|
return all(
|
||||||
|
bool(path_matcher.search(filepath)) for path_matcher in self.path_matchers
|
||||||
|
) and bool(self.filename_matcher.search(filename))
|
||||||
|
|
||||||
|
def get_matching_files(self):
|
||||||
|
for directory_path, directory_names, filenames in os.walk('.'):
|
||||||
|
for directory_name in directory_names:
|
||||||
|
if any(
|
||||||
|
ignore_matcher.search(directory_name)
|
||||||
|
for ignore_matcher in self.ignore_matchers
|
||||||
|
):
|
||||||
|
del directory_names[directory_names.index(directory_name)]
|
||||||
|
for filename in filenames:
|
||||||
|
if self.match_path_and_filename(directory_path, filename):
|
||||||
|
yield os.path.join(directory_path, filename)
|
||||||
|
|
||||||
|
def git_blame_files(self, filenames):
|
||||||
|
for filename in filenames:
|
||||||
|
if subprocess.call(
|
||||||
|
['git ls-files %s --error-unmatch' % filename],
|
||||||
|
shell=True,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
yield (filename, subprocess.Popen(
|
||||||
|
['git', 'blame', filename],
|
||||||
|
stdout=subprocess.PIPE
|
||||||
|
).communicate()[0])
|
||||||
|
|
||||||
|
def count_blame_lines(self):
|
||||||
|
for blame_output_chunk in segment(
|
||||||
|
self.git_blame_files(self.get_matching_files()),
|
||||||
|
self.chunk_size
|
||||||
|
):
|
||||||
|
self._count_blame_lines(blame_output_chunk)
|
||||||
|
if self.chunk_size:
|
||||||
|
self.print_results(
|
||||||
|
max_committers=50,
|
||||||
|
min_blame_lines=None
|
||||||
|
)
|
||||||
|
|
||||||
|
def _count_blame_lines(self, blame_outputs):
|
||||||
|
for _, blame_output in blame_outputs:
|
||||||
|
for line in blame_output.split('\n'):
|
||||||
|
match = self.committer_matcher.search(line)
|
||||||
|
if match:
|
||||||
|
committer = match.group(1)
|
||||||
|
self.blame_line_count_map[committer] = \
|
||||||
|
self.blame_line_count_map.setdefault(committer, 0) + 1
|
||||||
|
|
||||||
|
def get_blame_lines_in_files_by_comitters(self):
|
||||||
|
blame_count_in_files_by_committer = {}
|
||||||
|
for filename, blame_output in self.git_blame_files(self.get_matching_files()):
|
||||||
|
for line in blame_output.split('\n'):
|
||||||
|
match = self.committer_matcher.search(line)
|
||||||
|
if match:
|
||||||
|
committer = match.group(1)
|
||||||
|
committer_blame_lines = blame_count_in_files_by_committer.setdefault(
|
||||||
|
committer, {},
|
||||||
|
)
|
||||||
|
committer_blame_lines[filename] = committer_blame_lines.setdefault(
|
||||||
|
filename, 0,
|
||||||
|
) + 1
|
||||||
|
return blame_count_in_files_by_committer
|
||||||
|
|
||||||
|
def print_results(self, max_committers=None, min_blame_lines=None):
|
||||||
|
print self.DIVIDER
|
||||||
|
for (rank, (committer, blame_lines)) in enumerate(
|
||||||
|
sorted(
|
||||||
|
self.blame_line_count_map.iteritems(),
|
||||||
|
key=lambda x: x[1],
|
||||||
|
reverse=True
|
||||||
|
)
|
||||||
|
):
|
||||||
|
if rank is not None and rank == max_committers:
|
||||||
|
return
|
||||||
|
if min_blame_lines is None or blame_lines > min_blame_lines:
|
||||||
|
print str(rank + 1), committer, ': ', blame_lines
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = optparse.OptionParser()
|
||||||
|
parser.add_option(
|
||||||
|
'--search-re',
|
||||||
|
action='append',
|
||||||
|
dest='search_expressions',
|
||||||
|
help='A regular expression to use when inspecting filepaths'
|
||||||
|
)
|
||||||
|
parser.add_option(
|
||||||
|
'--ignore-re',
|
||||||
|
action='append',
|
||||||
|
default=[],
|
||||||
|
dest='ignore_expressions',
|
||||||
|
help='Ignore directories matching this re.'
|
||||||
|
)
|
||||||
|
parser.add_option(
|
||||||
|
'-x',
|
||||||
|
action='append',
|
||||||
|
dest='file_extensions',
|
||||||
|
help=('Search for filenames with the given file extension. '
|
||||||
|
'Can be used multiple times.')
|
||||||
|
)
|
||||||
|
parser.add_option(
|
||||||
|
'--chunk-size',
|
||||||
|
dest='chunk_size',
|
||||||
|
type=int,
|
||||||
|
help='Print the rankings at intervals of CHUNK_SIZE files.'
|
||||||
|
)
|
||||||
|
parser.add_option(
|
||||||
|
'--committer-lines',
|
||||||
|
dest='committer_lines',
|
||||||
|
action='store_true',
|
||||||
|
default=False,
|
||||||
|
help=('Count blame lines for committer by file.')
|
||||||
|
)
|
||||||
|
|
||||||
|
(namespace, _) = parser.parse_args()
|
||||||
|
|
||||||
|
blame_counter_build_kwargs = {
|
||||||
|
'chunk_size': namespace.chunk_size,
|
||||||
|
'search_expressions': namespace.search_expressions,
|
||||||
|
'ignore_expressions': namespace.ignore_expressions
|
||||||
|
}
|
||||||
|
if namespace.file_extensions:
|
||||||
|
blame_counter_build_kwargs['filename_re'] = build_file_extension_re(
|
||||||
|
namespace.file_extensions
|
||||||
|
)
|
||||||
|
|
||||||
|
blame_counter = BlameCounter(**blame_counter_build_kwargs)
|
||||||
|
if namespace.committer_lines:
|
||||||
|
import operator
|
||||||
|
|
||||||
|
def sum_of_comitter_lines(committer_tuple):
|
||||||
|
_, blame_lines_by_file = committer_tuple
|
||||||
|
return sum(blame_count for filename, blame_count in blame_lines_by_file.iteritems())
|
||||||
|
blame_lines_in_files_by_committers = blame_counter.get_blame_lines_in_files_by_comitters()
|
||||||
|
blame_lines_in_files_by_comitters_sorted_by_total_count = sorted(
|
||||||
|
blame_lines_in_files_by_committers.iteritems(),
|
||||||
|
key=sum_of_comitter_lines,
|
||||||
|
reverse=True
|
||||||
|
)
|
||||||
|
sorted_blame_lines_in_files_by_comitters = [
|
||||||
|
(comitter, sorted(blame_lines_by_file.iteritems(), key=operator.itemgetter(1), reverse=True))
|
||||||
|
for comitter, blame_lines_by_file in blame_lines_in_files_by_comitters_sorted_by_total_count
|
||||||
|
]
|
||||||
|
import ipdb; ipdb.set_trace()
|
||||||
|
else:
|
||||||
|
blame_counter.count_blame_lines()
|
||||||
|
blame_counter.print_results()
|
Loading…
Reference in New Issue
Block a user