dupecheck.py

changeset 4
a7e9e7974c22
parent 3
569fa9a431b9
child 5
51e57e9f8db1
equal deleted inserted replaced
3:569fa9a431b9 4:a7e9e7974c22
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 from __future__ import print_function
3 import difflib 4 import difflib
4 import os, sys 5 import os, sys
5 6
6 BASEDIR="../DREAMBOX" 7 BASEDIR="../DREAMBOX"
7 8
10 11
11 def similarity(seq1, seq2): 12 def similarity(seq1, seq2):
12 #return difflib.SequenceMatcher(a=seq1.lower(), b=seq2.lower()).ratio() 13 #return difflib.SequenceMatcher(a=seq1.lower(), b=seq2.lower()).ratio()
13 return difflib.SequenceMatcher(a=seq1, b=seq2).ratio() 14 return difflib.SequenceMatcher(a=seq1, b=seq2).ratio()
14 15
16 print("Reading files...")
15 for root, subdirs, files in os.walk(BASEDIR): 17 for root, subdirs, files in os.walk(BASEDIR):
16 for filename in files: 18 for filename in files:
17 if filename.endswith(".ts"): 19 if filename.endswith(".ts"):
18 file_path = os.path.join(root, filename) 20 file_path = os.path.join(root, filename)
19 title = filename.split(" - ") 21 title = filename.split(" - ")
24 title = title[:-3].lower() 26 title = title[:-3].lower()
25 FILELIST.append([title, filename, root]) 27 FILELIST.append([title, filename, root])
26 elif filename.endswith(".mkv"): 28 elif filename.endswith(".mkv"):
27 title = filename[:-4].lower() 29 title = filename[:-4].lower()
28 FILELIST.append([title, filename, root]) 30 FILELIST.append([title, filename, root])
31 print("%i files found, running duplicate testing loop" % len(FILELIST))
29 32
30 33
31 for idx, item in enumerate(FILELIST): 34 for idx, item in enumerate(FILELIST):
32 comparelist = FILELIST[idx+1:] 35 comparelist = FILELIST[idx+1:]
33 #print "%d %s (%d to compare)" % (idx, item[0], len(comparelist)) 36 print("%d %s\033[K\r" % (idx, item[0]),
37 end='')
38 sys.stdout.flush()
34 for idx2, item2 in enumerate(comparelist): 39 for idx2, item2 in enumerate(comparelist):
35 if similarity(item[0], item2[0]) > 0.85: 40 if similarity(item[0], item2[0]) > 0.85:
36 #print "possible duplicate %d %s" % (idx2, item2[0]) 41 #print "possible duplicate %d %s" % (idx2, item2[0])
37 key = os.path.join(item[2], item[1]) 42 key = os.path.join(item[2], item[1])
38 if not key in DUPLICATES.keys(): 43 if not key in DUPLICATES.keys():
39 DUPLICATES[key] = [] 44 DUPLICATES[key] = []
40 DUPLICATES[key].append( 45 DUPLICATES[key].append(
41 os.path.join(item2[2], item2[1])) 46 os.path.join(item2[2], item2[1]))
42 47
48 print("\n\n\n")
49 idx = 1
43 for base in DUPLICATES.keys(): 50 for base in DUPLICATES.keys():
44 print base 51 print("Duplicate file set #%i" % idx)
52 print(base)
45 for dup in DUPLICATES[base]: 53 for dup in DUPLICATES[base]:
46 print dup 54 print(dup)
47 print "" 55 print()
56 idx += 1

mercurial