If someone want to play with it, here's preliminary patch:
diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
--- a/mercurial/cmdutil.py
+++ b/mercurial/cmdutil.py
@@ -270,31 +270,49 @@
def findrenames(repo, added, removed, threshold):
'''find renamed files -- yields (before, after, score) tuples'''
+ copies = {}
ctx = repo['.']
- for a in added:
- aa = repo.wread(a)
- bestname, bestscore = None, threshold
- for r in removed:
- if r not in ctx:
- continue
- rr = ctx.filectx(r).data()
+ for r in removed:
+ if r not in ctx:
+ continue
+ fctx = ctx.filectx(r)
+ if threshold == 100:
+ n = fctx.node()
+ fparents = fctx.filelog().parents(n)
+ fparents.sort()
+ partialhash = _sha(fparents[0]).update(fparents[1])
+ def score(t):
+ s = partialhash.copy()
+ s.update(t)
+ h = s.digest()
+ if h == n:
+ return 100
+ return 0
+ else:
+ rr = fctx.data()
+ def score(t):
+ # bdiff.blocks() returns blocks of matching lines
+ # count the number of bytes in each
+ equal = 0
+ alines = mdiff.splitnewlines(aa)
+ matches = bdiff.blocks(aa, rr)
+ for x1, x2, y1, y2 in matches:
+ for line in alines[x1:x2]:
+ equal += len(line)
- # bdiff.blocks() returns blocks of matching lines
- # count the number of bytes in each
- equal = 0
- alines = mdiff.splitnewlines(aa)
- matches = bdiff.blocks(aa, rr)
- for x1,x2,y1,y2 in matches:
- for line in alines[x1:x2]:
- equal += len(line)
+ lengths = len(aa) + len(rr)
+ if lengths:
+ return equal*2.0 / lengths
+ return 0
- lengths = len(aa) + len(rr)
- if lengths:
- myscore = equal*2.0 / lengths
- if myscore >= bestscore:
- bestname, bestscore = r, myscore
- if bestname:
- yield bestname, a, bestscore
+ for a in added:
+ bestscore = copies.get(a, (None, threshold))[1]
+ aa = repo.wread(a)
+ myscore = score(aa)
+ if myscore >= bestscore:
+ copies[a] = (r, myscore)
+ for k, v in copies.iteritems():
+ yield v[0], k, v[1]
def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None):
if dry_run is None:
|