10 |
11 |
11 def similarity(seq1, seq2): |
12 def similarity(seq1, seq2): |
12 #return difflib.SequenceMatcher(a=seq1.lower(), b=seq2.lower()).ratio() |
13 #return difflib.SequenceMatcher(a=seq1.lower(), b=seq2.lower()).ratio() |
13 return difflib.SequenceMatcher(a=seq1, b=seq2).ratio() |
14 return difflib.SequenceMatcher(a=seq1, b=seq2).ratio() |
14 |
15 |
|
16 print("Reading files...") |
15 for root, subdirs, files in os.walk(BASEDIR): |
17 for root, subdirs, files in os.walk(BASEDIR): |
16 for filename in files: |
18 for filename in files: |
17 if filename.endswith(".ts"): |
19 if filename.endswith(".ts"): |
18 file_path = os.path.join(root, filename) |
20 file_path = os.path.join(root, filename) |
19 title = filename.split(" - ") |
21 title = filename.split(" - ") |
24 title = title[:-3].lower() |
26 title = title[:-3].lower() |
25 FILELIST.append([title, filename, root]) |
27 FILELIST.append([title, filename, root]) |
26 elif filename.endswith(".mkv"): |
28 elif filename.endswith(".mkv"): |
27 title = filename[:-4].lower() |
29 title = filename[:-4].lower() |
28 FILELIST.append([title, filename, root]) |
30 FILELIST.append([title, filename, root]) |
|
31 print("%i files found, running duplicate testing loop" % len(FILELIST)) |
29 |
32 |
30 |
33 |
31 for idx, item in enumerate(FILELIST): |
34 for idx, item in enumerate(FILELIST): |
32 comparelist = FILELIST[idx+1:] |
35 comparelist = FILELIST[idx+1:] |
33 #print "%d %s (%d to compare)" % (idx, item[0], len(comparelist)) |
36 print("%d %s\033[K\r" % (idx, item[0]), |
|
37 end='') |
|
38 sys.stdout.flush() |
34 for idx2, item2 in enumerate(comparelist): |
39 for idx2, item2 in enumerate(comparelist): |
35 if similarity(item[0], item2[0]) > 0.85: |
40 if similarity(item[0], item2[0]) > 0.85: |
36 #print "possible duplicate %d %s" % (idx2, item2[0]) |
41 #print "possible duplicate %d %s" % (idx2, item2[0]) |
37 key = os.path.join(item[2], item[1]) |
42 key = os.path.join(item[2], item[1]) |
38 if not key in DUPLICATES.keys(): |
43 if not key in DUPLICATES.keys(): |
39 DUPLICATES[key] = [] |
44 DUPLICATES[key] = [] |
40 DUPLICATES[key].append( |
45 DUPLICATES[key].append( |
41 os.path.join(item2[2], item2[1])) |
46 os.path.join(item2[2], item2[1])) |
42 |
47 |
|
48 print("\n\n\n") |
|
49 idx = 1 |
43 for base in DUPLICATES.keys(): |
50 for base in DUPLICATES.keys(): |
44 print base |
51 print("Duplicate file set #%i" % idx) |
|
52 print(base) |
45 for dup in DUPLICATES[base]: |
53 for dup in DUPLICATES[base]: |
46 print dup |
54 print(dup) |
47 print "" |
55 print() |
|
56 idx += 1 |