prepare for speedup

Sat, 04 Nov 2017 22:52:01 +0100

author
mdd
date
Sat, 04 Nov 2017 22:52:01 +0100
changeset 4
a7e9e7974c22
parent 3
569fa9a431b9
child 5
51e57e9f8db1

prepare for speedup

dupecheck.py file | annotate | diff | comparison | revisions
--- a/dupecheck.py	Sat Nov 04 22:34:12 2017 +0100
+++ b/dupecheck.py	Sat Nov 04 22:52:01 2017 +0100
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 
+from __future__ import print_function
 import difflib
 import os, sys
 
@@ -12,6 +13,7 @@
     #return difflib.SequenceMatcher(a=seq1.lower(), b=seq2.lower()).ratio()
     return difflib.SequenceMatcher(a=seq1, b=seq2).ratio()
 
+print("Reading files...")
 for root, subdirs, files in os.walk(BASEDIR):
     for filename in files:
         if filename.endswith(".ts"):
@@ -26,11 +28,14 @@
         elif filename.endswith(".mkv"):
             title = filename[:-4].lower()
             FILELIST.append([title, filename, root])
+print("%i files found, running duplicate testing loop" % len(FILELIST))
 
 
 for idx, item in enumerate(FILELIST):
     comparelist = FILELIST[idx+1:]
-    #print "%d %s (%d to compare)" % (idx, item[0], len(comparelist))
+    print("%d %s\033[K\r" % (idx, item[0]),
+        end='')
+    sys.stdout.flush()
     for idx2, item2 in enumerate(comparelist):
         if similarity(item[0], item2[0]) > 0.85:
             #print "possible duplicate %d %s" % (idx2, item2[0])
@@ -40,8 +45,12 @@
             DUPLICATES[key].append(
                 os.path.join(item2[2], item2[1]))
 
+print("\n\n\n")
+idx = 1
 for base in DUPLICATES.keys():
-    print base
+    print("Duplicate file set #%i" % idx)
+    print(base)
     for dup in DUPLICATES[base]:
-        print dup
-    print ""
\ No newline at end of file
+        print(dup)
+    print()
+    idx += 1

mercurial