# HG changeset patch # User mdd # Date 1509832321 -3600 # Node ID a7e9e7974c2236b654ae756370eaa44c913a7064 # Parent 569fa9a431b9321c30c8ea3af619407358d88781 prepare for speedup diff -r 569fa9a431b9 -r a7e9e7974c22 dupecheck.py --- a/dupecheck.py Sat Nov 04 22:34:12 2017 +0100 +++ b/dupecheck.py Sat Nov 04 22:52:01 2017 +0100 @@ -1,5 +1,6 @@ #!/usr/bin/env python +from __future__ import print_function import difflib import os, sys @@ -12,6 +13,7 @@ #return difflib.SequenceMatcher(a=seq1.lower(), b=seq2.lower()).ratio() return difflib.SequenceMatcher(a=seq1, b=seq2).ratio() +print("Reading files...") for root, subdirs, files in os.walk(BASEDIR): for filename in files: if filename.endswith(".ts"): @@ -26,11 +28,14 @@ elif filename.endswith(".mkv"): title = filename[:-4].lower() FILELIST.append([title, filename, root]) +print("%i files found, running duplicate testing loop" % len(FILELIST)) for idx, item in enumerate(FILELIST): comparelist = FILELIST[idx+1:] - #print "%d %s (%d to compare)" % (idx, item[0], len(comparelist)) + print("%d %s\033[K\r" % (idx, item[0]), + end='') + sys.stdout.flush() for idx2, item2 in enumerate(comparelist): if similarity(item[0], item2[0]) > 0.85: #print "possible duplicate %d %s" % (idx2, item2[0]) @@ -40,8 +45,12 @@ DUPLICATES[key].append( os.path.join(item2[2], item2[1])) +print("\n\n\n") +idx = 1 for base in DUPLICATES.keys(): - print base + print("Duplicate file set #%i" % idx) + print(base) for dup in DUPLICATES[base]: - print dup - print "" \ No newline at end of file + print(dup) + print() + idx += 1