summaryrefslogtreecommitdiffstats
path: root/webapp/codereview/intra_region_diff.py
diff options
context:
space:
mode:
Diffstat (limited to 'webapp/codereview/intra_region_diff.py')
-rw-r--r--webapp/codereview/intra_region_diff.py699
1 files changed, 699 insertions, 0 deletions
diff --git a/webapp/codereview/intra_region_diff.py b/webapp/codereview/intra_region_diff.py
new file mode 100644
index 0000000000..3383c708e8
--- /dev/null
+++ b/webapp/codereview/intra_region_diff.py
@@ -0,0 +1,699 @@
+# Copyright 2008 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Intra-region diff utilities.
+
+Intra-region diff highlights the blocks of code which have been changed or
+deleted within a region. So instead of highlighting the whole region marked as
+changed, the user can see what exactly was changed within that region.
+
+Terminology:
+ 'region' is a list of consecutive code lines.
+ 'word' is the unit of intra-region diff. Its definition is arbitrary based on
+ what we think as to be a good unit of difference between two regions.
+ 'block' is a small section of code within a region. It can span multiple
+ lines. There can be multiple non overlapping blocks within a region. A block
+ can potentially span the whole region.
+
+The blocks have two representations. One is of the format (offset1, offset2,
+size) which is returned by the SequenceMatcher to indicate a match of
+length 'size' starting at offset1 in the first/old line and starting at offset2
+in the second/new line. We convert this representation to a pair of tuples i.e.
+(offset1, size) and (offset2, size) for rendering each side of the diff
+separately. This latter representation is also more efficient for doing
+compaction of adjacent blocks which reduces the size of the HTML markup. See
+CompactBlocks for more details.
+
+SequenceMatcher always returns one special matching block at the end with
+contents (len(line1), len(line2), 0). We retain this special block as it
+simplifies for loops in rendering the last non-matching block. All functions
+which deal with the sequence of blocks assume presence of the special block at
+the end of the sequence and retain it.
+"""
+
+import cgi
+import difflib
+import re
+
+# Tag to begin a diff chunk.
+BEGIN_TAG = "<span class=\"%s\">"
+# Tag to end a diff block.
+END_TAG = "</span>"
+# Tag used for visual tab indication
+TAB_TAG = ("<span class=\"visualtab\" title=\"Visual tab indicator. "
+ "Change settings above to hide.\">&raquo;</span>")
+# Color scheme to govern the display properties of diff blocks and matching
+# blocks. Each value e.g. 'oldlight' corresponds to a CSS style.
+COLOR_SCHEME = {
+ 'old': {
+ 'match': 'oldlight',
+ 'diff': 'olddark',
+ 'bckgrnd': 'oldlight',
+ },
+ 'new': {
+ 'match': 'newlight',
+ 'diff': 'newdark',
+ 'bckgrnd': 'newlight',
+ },
+ 'oldmove': {
+ 'match': 'movelight',
+ 'diff': 'oldmovedark',
+ 'bckgrnd': 'movelight'
+ },
+ 'newmove': {
+ 'match': 'newlight',
+ 'diff': 'newdark',
+ 'bckgrnd': 'newlight'
+ },
+}
+# Regular expressions to tokenize lines. Default is 'b'.
+EXPRS = {
+ 'a': r'(\w+|[^\w\s]+|\s+)',
+ 'b': r'([A-Za-z0-9]+|[^A-Za-z0-9])',
+ 'c': r'([A-Za-z0-9_]+|[^A-Za-z0-9_])',
+ }
+# Maximum total characters in old and new lines for doing intra-region diffs.
+# Intra-region diff for larger regions is hard to comprehend and wastes CPU
+# time.
+MAX_TOTAL_LEN = 10000
+
+
+def ExpandTabs(text, tabsize=8, tab_marker=None):
+ """Expand tab characters in a string into spaces with an optional marker.
+
+ Args:
+ text: a string containing tab characters.
+ tabsize: the number of spaces that a tab represents
+ tab_marker: a character; if not None, we replace the first character
+ of each tab expansion with this.
+ """
+ tabpos = text.find("\t")
+ while tabpos >= 0:
+ fillwidth = tabsize - (tabpos % tabsize)
+ if fillwidth == 0:
+ fillwidth = tabsize
+ if tab_marker:
+ fill = tab_marker + " " * (fillwidth - 1)
+ else:
+ fill = " " * fillwidth
+ # We avoid str.replace in case tab_marker is \t
+ text = text[:tabpos] + fill + text[tabpos+1:]
+ tabpos = text.find("\t", tabpos + 1)
+ return text
+
+
+def Fold(text, limit=85, indent=5, offset=0, tabsize=8, mark_tabs=False):
+ """Break a long string into multiple lines.
+
+ Lines longer than 'limit' are broken up into pieces of at most
+ 'limit' characters; continuation lines start with 'indent' spaces.
+
+ 'offset' is used to indicate if 'text' itself doesn't align with
+ the beginning of line e.g. we are trying to Fold a line when we have
+ already printed 'offset' number of characters to the output.
+
+ This also translates tabs into 'tabsize' spaces. If 'mark_tabs' is true,
+ then we indicate the first character of each expanded tab visually.
+
+ Input and output are assumed to be in UTF-8; the computation is done
+ in Unicode. (Still not good enough if zero-width characters are
+ present.) If the input is not valid UTF-8, then the encoding is
+ passed through, potentially breaking up multi-byte characters.
+ We pass the line through cgi.escape before returning it.
+
+ A trailing newline is always stripped from the input first.
+ """
+ assert tabsize > 0, tabsize
+ if text.endswith("\n"):
+ text = text[:-1]
+ try:
+ text = unicode(text, "utf-8")
+ except:
+ pass
+ if "\t" in text:
+ # If mark_tabs is true, we retain one \t character as a marker during
+ # expansion so that we later replace it with an HTML snippet.
+ tab_marker = mark_tabs and "\t" or None
+ rest = text[indent-offset:]
+ text = text[:indent-offset] + ExpandTabs(rest, tabsize, tab_marker)
+ # Perform wrapping.
+ if len(text) > limit - offset:
+ parts = []
+ prefix = ""
+ i = 0
+ j = limit - offset
+ while i < len(text):
+ parts.append(prefix + text[i:j])
+ i = j
+ j += limit - indent
+ prefix = " " * indent
+ text = "\n".join(parts)
+ # Colorize tab markers (after calling escape)
+ text = cgi.escape(text)
+ text = text.replace("\t", TAB_TAG)
+ if isinstance(text, unicode):
+ return text.encode("utf-8", "replace")
+ return text
+
+
+def CompactBlocks(blocks):
+ """Compacts adjacent code blocks.
+
+ In many cases 2 adjacent blocks can be merged into one. This allows
+ to do some further processing on those blocks.
+
+ Args:
+ blocks: [(offset1, size), ...]
+
+ Returns:
+ A list with the same structure as the input with adjacent blocks
+ merged. However, the last block (which is always assumed to have
+ a zero size) is never merged. For example, the input
+ [(0, 2), (2, 8), (10, 5), (15, 0)]
+ will produce the output [(0, 15), (15, 0)].
+ """
+ if len(blocks) == 1:
+ return blocks
+ result = [blocks[0]]
+ for block in blocks[1:-1]:
+ last_start, last_len = result[-1]
+ curr_start, curr_len = block
+ if last_start + last_len == curr_start:
+ result[-1] = last_start, last_len + curr_len
+ else:
+ result.append(block)
+ result.append(blocks[-1])
+ return result
+
+
+def FilterBlocks(blocks, filter_func):
+ """Gets rid of any blocks if filter_func evaluates false for them.
+
+ Args:
+ blocks: [(offset1, offset2, size), ...]; must have at least 1 entry
+ filter_func: a boolean function taking a single argument of the form
+ (offset1, offset2, size)
+
+ Returns:
+ A list with the same structure with entries for which filter_func()
+ returns false removed. However, the last block is always included.
+ """
+ # We retain the 'special' block at the end.
+ res = [b for b in blocks[:-1] if filter_func(b)]
+ res.append(blocks[-1])
+ return res
+
+
+def GetDiffParams(expr='b', min_match_ratio=0.6, min_match_size=2, dbg=False):
+ """Returns a tuple of various parameters which affect intra region diffs.
+
+ Args:
+ expr: regular expression id to use to identify 'words' in the intra region
+ diff
+ min_match_ratio: minimum similarity between regions to qualify for intra
+ region diff
+ min_match_size: the smallest matching block size to use. Blocks smaller
+ than this are ignored.
+ dbg: to turn on generation of debugging information for the diff
+
+ Returns:
+ 4 tuple (expr, min_match_ratio, min_match_size, dbg) that can be used to
+ customize diff. It can be passed to functions like WordDiff and
+ IntraLineDiff.
+ """
+ assert expr in EXPRS
+ assert min_match_size in xrange(1,5)
+ assert min_match_ratio > 0.0 and min_match_ratio < 1.0
+ return (expr, min_match_ratio, min_match_size, dbg)
+
+
+def CanDoIRDiff(old_lines, new_lines):
+ """Tells if it would be worth computing the intra region diff.
+
+ Calculating IR diff is costly and is usually helpful only for small regions.
+ We use a heuristic that if the total number of characters is more than a
+ certain threshold then we assume it is not worth computing the IR diff.
+
+ Args:
+ old_lines: an array of strings containing old text
+ new_lines: an array of strings containing new text
+
+ Returns:
+ True if we think it is worth computing IR diff for the region defined
+ by old_lines and new_lines, False otherwise.
+
+ TODO: Let GetDiffParams handle MAX_TOTAL_LEN param also.
+ """
+ total_chars = (sum(len(line) for line in old_lines) +
+ sum(len(line) for line in new_lines))
+ return total_chars <= MAX_TOTAL_LEN
+
+
+def WordDiff(line1, line2, diff_params):
+ """Returns blocks with positions indiciating word level diffs.
+
+ Args:
+ line1: string representing the left part of the diff
+ line2: string representing the right part of the diff
+ diff_params: return value of GetDiffParams
+
+ Returns:
+ A tuple (blocks, ratio) where:
+ blocks: [(offset1, offset2, size), ...] such that
+ line1[offset1:offset1+size] == line2[offset2:offset2+size]
+ and the last block is always (len(line1), len(line2), 0)
+ ratio: a float giving the diff ratio computed by SequenceMatcher.
+ """
+ match_expr, min_match_ratio, min_match_size, dbg = diff_params
+ exp = EXPRS[match_expr]
+ # We want to split at proper character boundaries in UTF8 text.
+ try:
+ line1_u = unicode(line1, "utf8")
+ except:
+ line1_u = line1
+ try:
+ line2_u = unicode(line2, "utf8")
+ except:
+ line2_u = line2
+ def _ToUTF8(s):
+ if isinstance(s, unicode):
+ return s.encode("utf8")
+ return s
+ a = map(_ToUTF8, re.findall(exp, line1_u, re.U))
+ b = map(_ToUTF8, re.findall(exp, line2_u, re.U))
+ s = difflib.SequenceMatcher(None, a, b)
+ matching_blocks = s.get_matching_blocks()
+ ratio = s.ratio()
+ # Don't show intra region diffs if both lines are too different and there is
+ # more than one block of difference. If there is only one change then we
+ # still show the intra region diff regardless of how different the blocks
+ # are.
+ # Note: We compare len(matching_blocks) with 3 because one block of change
+ # results in 2 matching blocks. We add the one special block and we get 3
+ # matching blocks per one block of change.
+ if ratio < min_match_ratio and len(matching_blocks) > 3:
+ return ([(0, 0, 0)], ratio)
+ # For now convert to character level blocks because we already have
+ # the code to deal with folding across lines for character blocks.
+ # Create arrays lena an lenb which have cumulative word lengths
+ # corresponding to word positions in a and b
+ lena = []
+ last = 0
+ for w in a:
+ lena.append(last)
+ last += len(w)
+ lenb = []
+ last = 0
+ for w in b:
+ lenb.append(last)
+ last += len(w)
+ lena.append(len(line1))
+ lenb.append(len(line2))
+ # Convert to character blocks
+ blocks = []
+ for s1, s2, blen in matching_blocks[:-1]:
+ apos = lena[s1]
+ bpos = lenb[s2]
+ block_len = lena[s1+blen] - apos
+ blocks.append((apos, bpos, block_len))
+ # Recreate the special block.
+ blocks.append((len(line1), len(line2), 0))
+ # Filter any matching blocks which are smaller than the desired threshold.
+ # We don't remove matching blocks with only a newline character as doing so
+ # results in showing the matching newline character as non matching which
+ # doesn't look good.
+ blocks = FilterBlocks(blocks, lambda b: (b[2] >= min_match_size or
+ line1[b[0]:b[0]+b[2]] == '\n'))
+ return (blocks, ratio)
+
+
+def IntraLineDiff(line1, line2, diff_params, diff_func=WordDiff):
+ """Computes intraline diff blocks.
+
+ Args:
+ line1: string representing the left part of the diff
+ line2: string representing the right part of the diff
+ diff_params: return value of GetDiffParams
+ diff_func: a function whose signature matches that of WordDiff() above
+
+ Returns:
+ A tuple of (blocks1, blocks2) corresponding to line1 and line2.
+ Each element of the tuple is an array of (start_pos, length)
+ tuples denoting a diff block.
+ """
+ blocks, ratio = diff_func(line1, line2, diff_params)
+ blocks1 = [(start1, length) for (start1, start2, length) in blocks]
+ blocks2 = [(start2, length) for (start1, start2, length) in blocks]
+
+ return (blocks1, blocks2, ratio)
+
+
+def DumpDiff(blocks, line1, line2):
+ """Helper function to debug diff related problems.
+
+ Args:
+ blocks: [(offset1, offset2, size), ...]
+ line1: string representing the left part of the diff
+ line2: string representing the right part of the diff
+ """
+ for offset1, offset2, size in blocks:
+ print offset1, offset2, size
+ print offset1, size, ": ", line1[offset1:offset1+size]
+ print offset2, size, ": ", line2[offset2:offset2+size]
+
+
+def RenderIntraLineDiff(blocks, line, tag, dbg_info=None, limit=80, indent=5,
+ tabsize=8, mark_tabs=False):
+ """Renders the diff blocks returned by IntraLineDiff function.
+
+ Args:
+ blocks: [(start_pos, size), ...]
+ line: line of code on which the blocks are to be rendered.
+ tag: 'new' or 'old' to control the color scheme.
+ dbg_info: a string that holds debugging informaion header. Debug
+ information is rendered only if dbg_info is not None.
+ limit: folding limit to be passed to the Fold function.
+ indent: indentation size to be passed to the Fold function.
+ tabsize: the number of spaces that a tab represents
+ mark_tabs: if True, mark the first character of each expanded tab visually
+
+ Returns:
+ A tuple of two elements. First element is the rendered version of
+ the input 'line'. Second element tells if the line has a matching
+ newline character.
+ """
+ res = ""
+ prev_start, prev_len = 0, 0
+ has_newline = False
+ debug_info = dbg_info
+ if dbg_info:
+ debug_info += "\nBlock Count: %d\nBlocks: " % (len(blocks) - 1)
+ for curr_start, curr_len in blocks:
+ if dbg_info and curr_len > 0:
+ debug_info += Fold("\n(%d, %d):|%s|" %
+ (curr_start, curr_len,
+ line[curr_start:curr_start+curr_len]),
+ limit, indent, tabsize, mark_tabs)
+ res += FoldBlock(line, prev_start + prev_len, curr_start, limit, indent,
+ tag, 'diff', tabsize, mark_tabs)
+ res += FoldBlock(line, curr_start, curr_start + curr_len, limit, indent,
+ tag, 'match', tabsize, mark_tabs)
+ # TODO: This test should be out of loop rather than inside. Once we
+ # filter out some junk from blocks (e.g. some empty blocks) we should do
+ # this test only on the last matching block.
+ if line[curr_start:curr_start+curr_len].endswith('\n'):
+ has_newline = True
+ prev_start, prev_len = curr_start, curr_len
+ return (res, has_newline, debug_info)
+
+
+def FoldBlock(src, start, end, limit, indent, tag, btype, tabsize=8,
+ mark_tabs=False):
+ """Folds and renders a block.
+
+ Args:
+ src: line of code
+ start: starting position of the block within 'src'.
+ end: ending position of the block within 'src'.
+ limit: folding limit
+ indent: indentation to use for folding.
+ tag: 'new' or 'old' to control the color scheme.
+ btype: block type i.e. 'match' or 'diff' to control the color schme.
+ tabsize: the number of spaces that a tab represents
+ mark_tabs: if True, mark the first character of each expanded tab visually
+
+ Returns:
+ A string represeting the rendered block.
+ """
+ text = src[start:end]
+ # We ignore newlines because we do newline management ourselves.
+ # Any other new lines with at the end will be stripped off by the Fold
+ # method.
+ if start >= end or text == '\n':
+ return ""
+ fbegin, lend, nl_plus_indent = GetTags(tag, btype, indent)
+ # 'bol' is beginning of line
+ offset_from_bol = start % limit
+ res = ""
+ # If this is the first block of the line and this is not the first line then
+ # insert newline + indent. This special case is not dealt with in the for
+ # loop below.
+ if offset_from_bol == 0 and not start == 0:
+ res = nl_plus_indent
+ text = Fold(text, limit, 0, offset_from_bol, tabsize, mark_tabs)
+ folded_lines = text.split("\n")
+ for (j, l) in enumerate(folded_lines):
+ if l:
+ res += (fbegin + l + lend)
+ # Add new line plus indent except for the last line.
+ if j < len(folded_lines) - 1:
+ res += nl_plus_indent
+ return res
+
+
+def GetTags(tag, btype, indent):
+ """Returns various tags for rendering diff blocks.
+
+ Args:
+ tag: a key from COLOR_SCHEME
+ btype: 'match' or 'diff'
+ indent: indentation to use
+ Returns
+ A 3 tuple (begin_tag, end_tag, formatted_indent_block)
+ """
+ assert tag in COLOR_SCHEME
+ assert btype in ['match', 'diff']
+ fbegin = BEGIN_TAG % COLOR_SCHEME[tag][btype]
+ bbegin = BEGIN_TAG % COLOR_SCHEME[tag]['bckgrnd']
+ lend = END_TAG
+ nl_plus_indent = '\n'
+ if indent > 0:
+ nl_plus_indent += bbegin + cgi.escape(" "*indent) + lend
+ return fbegin, lend, nl_plus_indent
+
+
+def ConvertToSingleLine(lines):
+ """Transforms a sequence of strings into a single line.
+
+ Returns the state that can be used to reconstruct the original lines with
+ the newline separators placed at the original place.
+
+ Args:
+ lines: sequence of strings
+
+ Returns:
+ Returns (single_line, state) tuple. 'state' shouldn't be modified by the
+ caller. It is only used to pass to other functions which will do certain
+ operations on this state.
+
+ 'state' is an array containing a dictionary for each item in lines. Each
+ dictionary has two elements 'pos' and 'blocks'. 'pos' is the end position
+ of each line in the final converted string. 'blocks' is an array of blocks
+ for each line of code. These blocks are added using MarkBlock function.
+ """
+ state = []
+ total_length = 0
+ for l in lines:
+ total_length += len(l)
+ # TODO: Use a tuple instead.
+ state.append({ 'pos': total_length, # the line split point
+ 'blocks': [] # blocks which belong to this line
+ })
+ result = "".join(lines)
+ assert len(state) == len(lines)
+ return (result, state)
+
+
+def MarkBlock(state, begin, end):
+ """Marks a block on a region such that it doesn't cross line boundaries.
+
+ It is an operation that can be performed on the single line which was
+ returned by the ConvertToSingleLine function. This operation marks arbitrary
+ block [begin,end) on the text. It also ensures that if [begin,end) crosses
+ line boundaries in the original region then it splits the section up in 2 or
+ more blocks such that no block crosses the boundaries.
+
+ Args:
+ state: the state returned by ConvertToSingleLine function. The state
+ contained is modified by this function.
+ begin: Beginning of the block.
+ end: End of the block (exclusive).
+
+ Returns:
+ None.
+ """
+ # TODO: Make sure already existing blocks don't overlap
+ if begin == end:
+ return
+ last_pos = 0
+ for entry in state:
+ pos = entry['pos']
+ if begin >= last_pos and begin < pos:
+ if end < pos:
+ # block doesn't cross any line boundary
+ entry['blocks'].append((begin, end))
+ else:
+ # block crosses the line boundary
+ entry['blocks'].append((begin, pos))
+ MarkBlock(state, pos, end)
+ break
+ last_pos = pos
+
+
+def GetBlocks(state):
+ """Returns all the blocks corresponding to the lines in the region.
+
+ Args:
+ state: the state returned by ConvertToSingleLine().
+
+ Returns:
+ An array of [(start_pos, length), ..] with an entry for each line in the
+ region.
+ """
+ result = []
+ last_pos = 0
+ for entry in state:
+ pos = entry['pos']
+ # Calculate block start points from the beginning of individual lines.
+ blocks = [(s[0]-last_pos, s[1]-s[0]) for s in entry['blocks']]
+ # Add one end marker block.
+ blocks.append((pos-last_pos, 0))
+ result.append(blocks)
+ last_pos = pos
+ return result
+
+
+def IntraRegionDiff(old_lines, new_lines, diff_params):
+ """Computes intra region diff.
+
+ Args:
+ old_lines: array of strings
+ new_lines: array of strings
+ diff_params: return value of GetDiffParams
+
+ Returns:
+ A tuple (old_blocks, new_blocks) containing matching blocks for old and new
+ lines.
+ """
+ old_line, old_state = ConvertToSingleLine(old_lines)
+ new_line, new_state = ConvertToSingleLine(new_lines)
+ old_blocks, new_blocks, ratio = IntraLineDiff(old_line, new_line, diff_params)
+ for begin, length in old_blocks:
+ MarkBlock(old_state, begin, begin+length)
+ old_blocks = GetBlocks(old_state)
+
+ for begin, length in new_blocks:
+ MarkBlock(new_state, begin, begin+length)
+ new_blocks = GetBlocks(new_state)
+
+ return (old_blocks, new_blocks, ratio)
+
+
+def NormalizeBlocks(blocks, line):
+ """Normalizes block representation of an intra line diff.
+
+ One diff can have multiple representations. Some times the diff returned by
+ the difflib for similar text sections is different even within same region.
+ For example if 2 already indented lines were indented with one additional
+ space character, the difflib may return the non matching space character to
+ be any of the already existing spaces. So one line may show non matching
+ space character as the first space character and the second line may show it
+ to be the last space character. This is sometimes confusing. This is the
+ side effect of the new regular expression we are using in WordDiff for
+ identifying indvidual words. This regular expression ('b') treats a sequence
+ of punctuation and whitespace characters as individual characters. It has
+ some visual advantages for showing a character level punctuation change as
+ one character change rather than a group of character change.
+
+ Making the normalization too generic can have performance implications. So
+ this implementation of normalize blocks intends to handle only one case.
+ Let's say S represents the space character and () marks a matching block.
+ Then the normalize operation will do following:
+
+ SSSS(SS)(ABCD) => SSSS(SS)(ABCD)
+ (SS)SSSS(ABCD) => SSSS(SS)(ABCD)
+ (SSSS)SS(ABCD) => SS(SSSS)(ABCD)
+
+ and so on..
+
+ Args:
+ blocks: An array of (offset, len) tuples defined on 'line'. These blocks
+ mark the matching areas. Anything between these matching blocks is
+ considered non-matching.
+ line: The text string on which the blocks are defined.
+
+ Returns:
+ An array of (offset, len) tuples representing the same diff but in
+ normalized form.
+ """
+ result = []
+ prev_start, prev_len = blocks[0]
+ for curr_start, curr_len in blocks[1:]:
+ # Note: nm_ is a prefix for non matching and m_ is a prefix for matching.
+ m_len, nm_len = prev_len, curr_start - (prev_start+prev_len)
+ # This if condition checks if matching and non matching parts are greater
+ # than zero length and are comprised of spaces ONLY. The last condition
+ # deals with most of the observed cases of strange diffs.
+ # Note: curr_start - prev_start == m_l + nm_l
+ # So line[prev_start:curr_start] == matching_part + non_matching_part.
+ text = line[prev_start:curr_start]
+ if m_len > 0 and nm_len > 0 and text == ' ' * len(text):
+ # Move the matching block towards the end i.e. normalize.
+ result.append((prev_start + nm_len, m_len))
+ else:
+ # Keep the existing matching block.
+ result.append((prev_start, prev_len))
+ prev_start, prev_len = curr_start, curr_len
+ result.append(blocks[-1])
+ assert len(result) == len(blocks)
+ return result
+
+
+def RenderIntraRegionDiff(lines, diff_blocks, tag, ratio, limit=80, indent=5,
+ tabsize=8, mark_tabs=False, dbg=False):
+ """Renders intra region diff for one side.
+
+ Args:
+ lines: list of strings representing source code in the region
+ diff_blocks: blocks that were returned for this region by IntraRegionDiff()
+ tag: 'new' or 'old'
+ ratio: similarity ratio returned by the diff computing function
+ limit: folding limit
+ indent: indentation size
+ tabsize: the number of spaces that a tab represents
+ mark_tabs: if True, mark the first character of each expanded tab visually
+ dbg: indicates if debug information should be rendered
+
+ Returns:
+ A list of strings representing the rendered version of each item in input
+ 'lines'.
+ """
+ result = []
+ dbg_info = None
+ if dbg:
+ dbg_info = 'Ratio: %.1f' % ratio
+ for line, blocks in zip(lines, diff_blocks):
+ blocks = NormalizeBlocks(blocks, line)
+ blocks = CompactBlocks(blocks)
+ diff = RenderIntraLineDiff(blocks,
+ line,
+ tag,
+ dbg_info=dbg_info,
+ limit=limit,
+ indent=indent,
+ tabsize=tabsize,
+ mark_tabs=mark_tabs)
+ result.append(diff)
+ assert len(result) == len(lines)
+ return result