git_blame_counter
This commit is contained in:
		
							
								
								
									
										202
									
								
								dotfiles/lib/python/git_blame_counter.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										202
									
								
								dotfiles/lib/python/git_blame_counter.py
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,202 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
import optparse
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
import subprocess
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def segment(iterable, segment_length):
 | 
			
		||||
    if segment_length is None:
 | 
			
		||||
        yield iterable
 | 
			
		||||
        raise StopIteration
 | 
			
		||||
 | 
			
		||||
    def yield_length():
 | 
			
		||||
        for _ in xrange(segment_length):
 | 
			
		||||
            yield iterable.next()
 | 
			
		||||
    while True:
 | 
			
		||||
        segment = list(yield_length())
 | 
			
		||||
        if not segment:
 | 
			
		||||
            raise StopIteration
 | 
			
		||||
        yield segment
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def build_file_extension_re(file_extensions):
 | 
			
		||||
    return '.*\.(?:' + '|'.join(file_extensions) + ')'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BlameCounter(object):
 | 
			
		||||
 | 
			
		||||
    DIVIDER = '------------------------------'
 | 
			
		||||
    committer_matcher = re.compile('\((.*?)\s*[0-9]{4}')
 | 
			
		||||
 | 
			
		||||
    def __init__(
 | 
			
		||||
        self,
 | 
			
		||||
        search_expressions=(),
 | 
			
		||||
        ignore_expressions=(),
 | 
			
		||||
        filename_re='.*\.(?:py|tmpl)',
 | 
			
		||||
        chunk_size=None,
 | 
			
		||||
    ):
 | 
			
		||||
        self.path_matchers = [
 | 
			
		||||
            re.compile(search_expression)
 | 
			
		||||
            for search_expression in search_expressions
 | 
			
		||||
        ]
 | 
			
		||||
        self.ignore_matchers = [
 | 
			
		||||
            re.compile(ignore_expression)
 | 
			
		||||
            for ignore_expression in ignore_expressions
 | 
			
		||||
        ]
 | 
			
		||||
        self.filename_matcher = re.compile(filename_re)
 | 
			
		||||
        self.chunk_size = chunk_size
 | 
			
		||||
        self.blame_line_count_map = {}
 | 
			
		||||
 | 
			
		||||
    def match_path_and_filename(self, path, filename):
 | 
			
		||||
        filepath = os.path.join(path, filename)
 | 
			
		||||
        return all(
 | 
			
		||||
            bool(path_matcher.search(filepath)) for path_matcher in self.path_matchers
 | 
			
		||||
        ) and bool(self.filename_matcher.search(filename))
 | 
			
		||||
 | 
			
		||||
    def get_matching_files(self):
 | 
			
		||||
        for directory_path, directory_names, filenames in os.walk('.'):
 | 
			
		||||
            for directory_name in directory_names:
 | 
			
		||||
                if any(
 | 
			
		||||
                        ignore_matcher.search(directory_name)
 | 
			
		||||
                        for ignore_matcher in self.ignore_matchers
 | 
			
		||||
                ):
 | 
			
		||||
                    del directory_names[directory_names.index(directory_name)]
 | 
			
		||||
            for filename in filenames:
 | 
			
		||||
                if self.match_path_and_filename(directory_path, filename):
 | 
			
		||||
                    yield os.path.join(directory_path, filename)
 | 
			
		||||
 | 
			
		||||
    def git_blame_files(self, filenames):
 | 
			
		||||
        for filename in filenames:
 | 
			
		||||
            if subprocess.call(
 | 
			
		||||
                ['git ls-files %s --error-unmatch' % filename],
 | 
			
		||||
                shell=True,
 | 
			
		||||
                stdout=subprocess.PIPE,
 | 
			
		||||
                stderr=subprocess.PIPE,
 | 
			
		||||
            ):
 | 
			
		||||
                continue
 | 
			
		||||
            yield (filename, subprocess.Popen(
 | 
			
		||||
                ['git', 'blame', filename],
 | 
			
		||||
                stdout=subprocess.PIPE
 | 
			
		||||
            ).communicate()[0])
 | 
			
		||||
 | 
			
		||||
    def count_blame_lines(self):
 | 
			
		||||
        for blame_output_chunk in segment(
 | 
			
		||||
            self.git_blame_files(self.get_matching_files()),
 | 
			
		||||
            self.chunk_size
 | 
			
		||||
        ):
 | 
			
		||||
            self._count_blame_lines(blame_output_chunk)
 | 
			
		||||
            if self.chunk_size:
 | 
			
		||||
                self.print_results(
 | 
			
		||||
                    max_committers=50,
 | 
			
		||||
                    min_blame_lines=None
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
    def _count_blame_lines(self, blame_outputs):
 | 
			
		||||
        for _, blame_output in blame_outputs:
 | 
			
		||||
            for line in blame_output.split('\n'):
 | 
			
		||||
                match = self.committer_matcher.search(line)
 | 
			
		||||
                if match:
 | 
			
		||||
                    committer = match.group(1)
 | 
			
		||||
                    self.blame_line_count_map[committer] = \
 | 
			
		||||
                        self.blame_line_count_map.setdefault(committer, 0) + 1
 | 
			
		||||
 | 
			
		||||
    def get_blame_lines_in_files_by_comitters(self):
 | 
			
		||||
        blame_count_in_files_by_committer = {}
 | 
			
		||||
        for filename, blame_output in self.git_blame_files(self.get_matching_files()):
 | 
			
		||||
            for line in blame_output.split('\n'):
 | 
			
		||||
                match = self.committer_matcher.search(line)
 | 
			
		||||
                if match:
 | 
			
		||||
                    committer = match.group(1)
 | 
			
		||||
                    committer_blame_lines = blame_count_in_files_by_committer.setdefault(
 | 
			
		||||
                        committer, {},
 | 
			
		||||
                    )
 | 
			
		||||
                    committer_blame_lines[filename] = committer_blame_lines.setdefault(
 | 
			
		||||
                        filename, 0,
 | 
			
		||||
                    ) + 1
 | 
			
		||||
        return blame_count_in_files_by_committer
 | 
			
		||||
 | 
			
		||||
    def print_results(self, max_committers=None, min_blame_lines=None):
 | 
			
		||||
        print self.DIVIDER
 | 
			
		||||
        for (rank, (committer, blame_lines)) in enumerate(
 | 
			
		||||
            sorted(
 | 
			
		||||
                self.blame_line_count_map.iteritems(),
 | 
			
		||||
                key=lambda x: x[1],
 | 
			
		||||
                reverse=True
 | 
			
		||||
            )
 | 
			
		||||
        ):
 | 
			
		||||
            if rank is not None and rank == max_committers:
 | 
			
		||||
                return
 | 
			
		||||
            if min_blame_lines is None or blame_lines > min_blame_lines:
 | 
			
		||||
                print str(rank + 1), committer, ': ', blame_lines
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    parser = optparse.OptionParser()
 | 
			
		||||
    parser.add_option(
 | 
			
		||||
        '--search-re',
 | 
			
		||||
        action='append',
 | 
			
		||||
        dest='search_expressions',
 | 
			
		||||
        help='A regular expression to use when inspecting filepaths'
 | 
			
		||||
    )
 | 
			
		||||
    parser.add_option(
 | 
			
		||||
        '--ignore-re',
 | 
			
		||||
        action='append',
 | 
			
		||||
        default=[],
 | 
			
		||||
        dest='ignore_expressions',
 | 
			
		||||
        help='Ignore directories matching this re.'
 | 
			
		||||
    )
 | 
			
		||||
    parser.add_option(
 | 
			
		||||
        '-x',
 | 
			
		||||
        action='append',
 | 
			
		||||
        dest='file_extensions',
 | 
			
		||||
        help=('Search for filenames with the given file extension. '
 | 
			
		||||
              'Can be used multiple times.')
 | 
			
		||||
    )
 | 
			
		||||
    parser.add_option(
 | 
			
		||||
        '--chunk-size',
 | 
			
		||||
        dest='chunk_size',
 | 
			
		||||
        type=int,
 | 
			
		||||
        help='Print the rankings at intervals of CHUNK_SIZE files.'
 | 
			
		||||
    )
 | 
			
		||||
    parser.add_option(
 | 
			
		||||
        '--committer-lines',
 | 
			
		||||
        dest='committer_lines',
 | 
			
		||||
        action='store_true',
 | 
			
		||||
        default=False,
 | 
			
		||||
        help=('Count blame lines for committer by file.')
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    (namespace, _) = parser.parse_args()
 | 
			
		||||
 | 
			
		||||
    blame_counter_build_kwargs = {
 | 
			
		||||
        'chunk_size': namespace.chunk_size,
 | 
			
		||||
        'search_expressions': namespace.search_expressions,
 | 
			
		||||
        'ignore_expressions': namespace.ignore_expressions
 | 
			
		||||
    }
 | 
			
		||||
    if namespace.file_extensions:
 | 
			
		||||
        blame_counter_build_kwargs['filename_re'] = build_file_extension_re(
 | 
			
		||||
            namespace.file_extensions
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    blame_counter = BlameCounter(**blame_counter_build_kwargs)
 | 
			
		||||
    if namespace.committer_lines:
 | 
			
		||||
        import operator
 | 
			
		||||
 | 
			
		||||
        def sum_of_comitter_lines(committer_tuple):
 | 
			
		||||
            _, blame_lines_by_file = committer_tuple
 | 
			
		||||
            return sum(blame_count for filename, blame_count in blame_lines_by_file.iteritems())
 | 
			
		||||
        blame_lines_in_files_by_committers = blame_counter.get_blame_lines_in_files_by_comitters()
 | 
			
		||||
        blame_lines_in_files_by_comitters_sorted_by_total_count = sorted(
 | 
			
		||||
            blame_lines_in_files_by_committers.iteritems(),
 | 
			
		||||
            key=sum_of_comitter_lines,
 | 
			
		||||
            reverse=True
 | 
			
		||||
        )
 | 
			
		||||
        sorted_blame_lines_in_files_by_comitters = [
 | 
			
		||||
            (comitter, sorted(blame_lines_by_file.iteritems(), key=operator.itemgetter(1), reverse=True))
 | 
			
		||||
            for comitter, blame_lines_by_file in blame_lines_in_files_by_comitters_sorted_by_total_count
 | 
			
		||||
        ]
 | 
			
		||||
        import ipdb; ipdb.set_trace()
 | 
			
		||||
    else:
 | 
			
		||||
        blame_counter.count_blame_lines()
 | 
			
		||||
        blame_counter.print_results()
 | 
			
		||||
		Reference in New Issue
	
	Block a user