[git-file-format] Add --lines flag to only format modified lines

Change-Id: Iab6b8316eab6a2d7c9ce7253dcda53f8fd7ee1aa
diff --git a/git-file-format b/git-file-format
index b29771c..1a2422c 100755
--- a/git-file-format
+++ b/git-file-format
@@ -18,6 +18,41 @@
 import git_utils
 import paths
 
+
+class Command(object):
+    """A formatting command."""
+
+    def __init__(self, cmd, rangefn=None):
+        """Defines a command.
+
+        Args:
+            cmd: A commandline template to run, as a sequence of strings;
+                must accept a bare filename as the final arg.
+            rangefn: Optional function that accepts a sequence of line ranges
+                and returns additional commandline entries. See self.Make()
+                for a description of the ranges.
+        """
+        self._cmd = tuple(cmd)
+        self._rangefn = rangefn
+
+    def Make(self, fname, ranges=()):
+        """Returns the command as a sequence of strings.
+
+        Args:
+            fname: The file to run the command on.
+            ranges: An optional description of modified lines, as a sequence
+                of 1-indexed (start-line, num-modified-lines) pairs. Not all
+                commands will respect these ranges.
+        Returns:
+            The command to format the specified file.
+        """
+        cmd = list(self._cmd)
+        if ranges and self._rangefn:
+            cmd.extend(self._rangefn(ranges))
+        cmd.append(fname)
+        return cmd
+
+
 host_platform = "%s-%s" % (
     platform.system().lower().replace("darwin", "mac"),
     {
@@ -33,14 +68,19 @@
 CHECK_HEADER_GUARDS_TOOL = os.path.join(paths.FUCHSIA_ROOT, "scripts", "style",
                                         "check-header-guards.py")
 
-CLANG_CMD = [
-    CLANG_TOOL, "-style=file", "-fallback-style=Google", "-sort-includes",
-    "-i"
-]
-DART_CMD = [DART_TOOL, "-w"]
-GN_CMD = [GN_TOOL, "format"]
-GO_CMD = [GO_TOOL, "-w"]
-FIX_HEADER_GUARDS_COMMAND = [CHECK_HEADER_GUARDS_TOOL, "--fix"]
+CLANG_CMD = Command(
+    (CLANG_TOOL, "-style=file", "-fallback-style=Google", "-sort-includes",
+     "-i"),
+    rangefn=
+    lambda ranges: ['-lines=%d:%d' % (st, st + ln - 1) for st, ln in ranges])
+DART_CMD = Command((DART_TOOL, "-w"))
+GN_CMD = Command((GN_TOOL, "format"))
+GO_CMD = Command((GO_TOOL, "-w"))
+FIX_HEADER_GUARDS_COMMAND = Command((CHECK_HEADER_GUARDS_TOOL, "--fix"))
+PYTHON_CMD = Command(
+    ("yapf", "-i"),
+    rangefn=
+    lambda ranges: ['--lines=%d-%d' % (st, st + ln - 1) for st, ln in ranges])
 
 EXT_TO_COMMANDS = {
     ".cc": [CLANG_CMD],
@@ -52,6 +92,7 @@
     ".h": [FIX_HEADER_GUARDS_COMMAND, CLANG_CMD],
     ".hh": [CLANG_CMD],
     ".hpp": [CLANG_CMD],
+    ".py": [PYTHON_CMD],
     ".ts": [CLANG_CMD],
 }
 
@@ -71,6 +112,12 @@
         default=False,
         help="tell me what you're doing")
     parser.add_argument(
+        "--lines",
+        dest="lines",
+        action="store_true",
+        default=False,
+        help="only format modified lines (for supported languages)")
+    parser.add_argument(
         "--all",
         dest="all",
         action="store_true",
@@ -79,10 +126,13 @@
     args = parser.parse_args()
 
     # Find the files to be formatted.
+    ranges = {}
     if args.all:
         files = git_utils.get_all_files()
     else:
         files = git_utils.get_diff_files()
+        if args.lines:
+            ranges = git_utils.get_modified_lines(files)
 
     if args.verbose:
         print
@@ -92,7 +142,7 @@
             return
 
         for file in files:
-            print " - " + file
+            print " - " + file + " " + ', '.join(repr(s) for s in ranges[file])
 
     # Run the formatters.
     if args.dry_run:
@@ -110,12 +160,18 @@
 
         _, extension = os.path.splitext(file)
         if extension not in EXT_TO_COMMANDS:
-            continue
+            # Sniff for a #! header
+            with open(file, 'r') as fp:
+                head = fp.read(80)
+                if head.startswith('#!') and 'python' in head:
+                    extension = '.py'
+            if extension not in EXT_TO_COMMANDS:
+                continue
 
         count += 1
         cmds = EXT_TO_COMMANDS[extension]
         for cmd in cmds:
-            cmd = cmd + [file]
+            cmd = cmd.Make(file, ranges=ranges.get(file, ()))
             if args.dry_run or args.verbose:
                 print cmd
 
diff --git a/git_utils.py b/git_utils.py
index d215269..8393255 100644
--- a/git_utils.py
+++ b/git_utils.py
@@ -2,8 +2,10 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
+import collections
 import os
 import os.path
+import re
 import subprocess
 
 
@@ -56,6 +58,69 @@
     paths = filter(len, subprocess.check_output(list_command).split("\n"))
     return [ os.path.join(git_root_path, x) for x in paths ]
 
+
+def get_modified_lines(paths):
+    """Returns ranges of lines modified in the specified files.
+
+    Args:
+        paths: A sequence of filenames to check for modified lines. Typically
+               a subset of the paths returned by get_diff_files().
+
+    Returns:
+        A dict mapping elements of `paths` to sequences of 1-indexed
+        (start-line-number, num-modified-lines) tuples.
+    """
+    # For a line like |+++ b/path/to/file.py|, extracts the filename
+    # "path/to/file.py" in group 1, intentionally skipping the 'b/' component.
+    filename_line_re = re.compile(r'^\+\+\+ [^/]+/(.*)')
+
+    # For lines like
+    #     @@ -9,1 +10,2 @@ Trailing text
+    # or
+    #     @@ -11,0 +12 @@ Trailing text
+    # extracts the second field ('10,2' or '12' in the examples) in group 1.
+    line_number_re = re.compile(r'^@@ -[0-9,]+ \+([0-9,]+) @@')
+
+    git_root_path = get_git_root()
+
+    # Diff output may be large, so stream the output rather than using
+    # communicate().
+    diff_command = ["git", "diff-index", "-U0", _get_diff_base()] + paths
+    p = subprocess.Popen(diff_command, stdout=subprocess.PIPE)
+
+    # Set up return dict. All input paths should have entries even if there
+    # are no diffs.
+    paths_to_ranges = collections.OrderedDict()
+    for path in paths:
+        paths_to_ranges[path] = []
+
+    ranges = None  # Pointer to the current file's entry in `paths_to_ranges`
+    with p.stdout:  # 'with' will close stdout on any exception
+        for line in p.stdout:
+            m = filename_line_re.match(line)
+            if m:
+                # Use absolute paths as keys.
+                fname = os.path.join(git_root_path, m.group(1))
+                ranges = paths_to_ranges[fname]
+                continue
+            m = line_number_re.match(line)
+            if m:
+                fields = m.group(1).split(',')
+                if len(fields) == 1:
+                    # If there's only one number,
+                    # the range contains a single line.
+                    fields.append('1')
+                start = int(fields[0])
+                count = int(fields[1])
+                if count > 0:  # If zero, the range was deleted.
+                    ranges.append((start, count))
+    if p.wait():
+        # Non-zero exit status.
+        raise subprocess.CalledProcessError(p.returncode, diff_command)
+
+    return paths_to_ranges
+
+
 def get_all_files():
     """Returns absolute paths to all files in the git repo under the current
     working directory.