git_blame_counter
This commit is contained in:
parent
54e815a804
commit
50253d414c
202
dotfiles/lib/python/git_blame_counter.py
Executable file
202
dotfiles/lib/python/git_blame_counter.py
Executable file
@ -0,0 +1,202 @@
|
||||
#!/usr/bin/env python
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
|
||||
def segment(iterable, segment_length):
|
||||
if segment_length is None:
|
||||
yield iterable
|
||||
raise StopIteration
|
||||
|
||||
def yield_length():
|
||||
for _ in xrange(segment_length):
|
||||
yield iterable.next()
|
||||
while True:
|
||||
segment = list(yield_length())
|
||||
if not segment:
|
||||
raise StopIteration
|
||||
yield segment
|
||||
|
||||
|
||||
def build_file_extension_re(file_extensions):
|
||||
return '.*\.(?:' + '|'.join(file_extensions) + ')'
|
||||
|
||||
|
||||
class BlameCounter(object):
|
||||
|
||||
DIVIDER = '------------------------------'
|
||||
committer_matcher = re.compile('\((.*?)\s*[0-9]{4}')
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
search_expressions=(),
|
||||
ignore_expressions=(),
|
||||
filename_re='.*\.(?:py|tmpl)',
|
||||
chunk_size=None,
|
||||
):
|
||||
self.path_matchers = [
|
||||
re.compile(search_expression)
|
||||
for search_expression in search_expressions
|
||||
]
|
||||
self.ignore_matchers = [
|
||||
re.compile(ignore_expression)
|
||||
for ignore_expression in ignore_expressions
|
||||
]
|
||||
self.filename_matcher = re.compile(filename_re)
|
||||
self.chunk_size = chunk_size
|
||||
self.blame_line_count_map = {}
|
||||
|
||||
def match_path_and_filename(self, path, filename):
|
||||
filepath = os.path.join(path, filename)
|
||||
return all(
|
||||
bool(path_matcher.search(filepath)) for path_matcher in self.path_matchers
|
||||
) and bool(self.filename_matcher.search(filename))
|
||||
|
||||
def get_matching_files(self):
|
||||
for directory_path, directory_names, filenames in os.walk('.'):
|
||||
for directory_name in directory_names:
|
||||
if any(
|
||||
ignore_matcher.search(directory_name)
|
||||
for ignore_matcher in self.ignore_matchers
|
||||
):
|
||||
del directory_names[directory_names.index(directory_name)]
|
||||
for filename in filenames:
|
||||
if self.match_path_and_filename(directory_path, filename):
|
||||
yield os.path.join(directory_path, filename)
|
||||
|
||||
def git_blame_files(self, filenames):
|
||||
for filename in filenames:
|
||||
if subprocess.call(
|
||||
['git ls-files %s --error-unmatch' % filename],
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
):
|
||||
continue
|
||||
yield (filename, subprocess.Popen(
|
||||
['git', 'blame', filename],
|
||||
stdout=subprocess.PIPE
|
||||
).communicate()[0])
|
||||
|
||||
def count_blame_lines(self):
|
||||
for blame_output_chunk in segment(
|
||||
self.git_blame_files(self.get_matching_files()),
|
||||
self.chunk_size
|
||||
):
|
||||
self._count_blame_lines(blame_output_chunk)
|
||||
if self.chunk_size:
|
||||
self.print_results(
|
||||
max_committers=50,
|
||||
min_blame_lines=None
|
||||
)
|
||||
|
||||
def _count_blame_lines(self, blame_outputs):
|
||||
for _, blame_output in blame_outputs:
|
||||
for line in blame_output.split('\n'):
|
||||
match = self.committer_matcher.search(line)
|
||||
if match:
|
||||
committer = match.group(1)
|
||||
self.blame_line_count_map[committer] = \
|
||||
self.blame_line_count_map.setdefault(committer, 0) + 1
|
||||
|
||||
def get_blame_lines_in_files_by_comitters(self):
|
||||
blame_count_in_files_by_committer = {}
|
||||
for filename, blame_output in self.git_blame_files(self.get_matching_files()):
|
||||
for line in blame_output.split('\n'):
|
||||
match = self.committer_matcher.search(line)
|
||||
if match:
|
||||
committer = match.group(1)
|
||||
committer_blame_lines = blame_count_in_files_by_committer.setdefault(
|
||||
committer, {},
|
||||
)
|
||||
committer_blame_lines[filename] = committer_blame_lines.setdefault(
|
||||
filename, 0,
|
||||
) + 1
|
||||
return blame_count_in_files_by_committer
|
||||
|
||||
def print_results(self, max_committers=None, min_blame_lines=None):
|
||||
print self.DIVIDER
|
||||
for (rank, (committer, blame_lines)) in enumerate(
|
||||
sorted(
|
||||
self.blame_line_count_map.iteritems(),
|
||||
key=lambda x: x[1],
|
||||
reverse=True
|
||||
)
|
||||
):
|
||||
if rank is not None and rank == max_committers:
|
||||
return
|
||||
if min_blame_lines is None or blame_lines > min_blame_lines:
|
||||
print str(rank + 1), committer, ': ', blame_lines
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = optparse.OptionParser()
|
||||
parser.add_option(
|
||||
'--search-re',
|
||||
action='append',
|
||||
dest='search_expressions',
|
||||
help='A regular expression to use when inspecting filepaths'
|
||||
)
|
||||
parser.add_option(
|
||||
'--ignore-re',
|
||||
action='append',
|
||||
default=[],
|
||||
dest='ignore_expressions',
|
||||
help='Ignore directories matching this re.'
|
||||
)
|
||||
parser.add_option(
|
||||
'-x',
|
||||
action='append',
|
||||
dest='file_extensions',
|
||||
help=('Search for filenames with the given file extension. '
|
||||
'Can be used multiple times.')
|
||||
)
|
||||
parser.add_option(
|
||||
'--chunk-size',
|
||||
dest='chunk_size',
|
||||
type=int,
|
||||
help='Print the rankings at intervals of CHUNK_SIZE files.'
|
||||
)
|
||||
parser.add_option(
|
||||
'--committer-lines',
|
||||
dest='committer_lines',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help=('Count blame lines for committer by file.')
|
||||
)
|
||||
|
||||
(namespace, _) = parser.parse_args()
|
||||
|
||||
blame_counter_build_kwargs = {
|
||||
'chunk_size': namespace.chunk_size,
|
||||
'search_expressions': namespace.search_expressions,
|
||||
'ignore_expressions': namespace.ignore_expressions
|
||||
}
|
||||
if namespace.file_extensions:
|
||||
blame_counter_build_kwargs['filename_re'] = build_file_extension_re(
|
||||
namespace.file_extensions
|
||||
)
|
||||
|
||||
blame_counter = BlameCounter(**blame_counter_build_kwargs)
|
||||
if namespace.committer_lines:
|
||||
import operator
|
||||
|
||||
def sum_of_comitter_lines(committer_tuple):
|
||||
_, blame_lines_by_file = committer_tuple
|
||||
return sum(blame_count for filename, blame_count in blame_lines_by_file.iteritems())
|
||||
blame_lines_in_files_by_committers = blame_counter.get_blame_lines_in_files_by_comitters()
|
||||
blame_lines_in_files_by_comitters_sorted_by_total_count = sorted(
|
||||
blame_lines_in_files_by_committers.iteritems(),
|
||||
key=sum_of_comitter_lines,
|
||||
reverse=True
|
||||
)
|
||||
sorted_blame_lines_in_files_by_comitters = [
|
||||
(comitter, sorted(blame_lines_by_file.iteritems(), key=operator.itemgetter(1), reverse=True))
|
||||
for comitter, blame_lines_by_file in blame_lines_in_files_by_comitters_sorted_by_total_count
|
||||
]
|
||||
import ipdb; ipdb.set_trace()
|
||||
else:
|
||||
blame_counter.count_blame_lines()
|
||||
blame_counter.print_results()
|
Loading…
Reference in New Issue
Block a user