Fri, 24 Nov 2017 23:11:58 +0100
added option parser and automatic file rename feature
3 | 1 | #!/usr/bin/env python |
2 | ||
4 | 3 | from __future__ import print_function |
3 | 4 | import difflib |
5 | import os, sys | |
6 | ||
7 | BASEDIR="../DREAMBOX" | |
8 | ||
9 | FILELIST=[] | |
10 | DUPLICATES={} | |
11 | ||
4 | 12 | print("Reading files...") |
3 | 13 | for root, subdirs, files in os.walk(BASEDIR): |
14 | for filename in files: | |
15 | if filename.endswith(".ts"): | |
16 | file_path = os.path.join(root, filename) | |
17 | title = filename.split(" - ") | |
18 | if len(title) == 1: | |
19 | title = title[0] | |
20 | else: | |
21 | title = " - ".join(title[2:]) | |
22 | title = title[:-3].lower() | |
23 | FILELIST.append([title, filename, root]) | |
24 | elif filename.endswith(".mkv"): | |
25 | title = filename[:-4].lower() | |
26 | FILELIST.append([title, filename, root]) | |
4 | 27 | print("%i files found, running duplicate testing loop" % len(FILELIST)) |
3 | 28 | |
5 | 29 | listlen = len(FILELIST) |
30 | for idx in range(listlen): | |
31 | if not FILELIST[idx]: | |
32 | continue | |
33 | print("\r%d %s\033[K" % (idx, FILELIST[idx][0]), | |
4 | 34 | end='') |
35 | sys.stdout.flush() | |
5 | 36 | for idx2 in range(idx + 1, listlen): |
37 | if FILELIST[idx2] and difflib.SequenceMatcher(a = FILELIST[idx][0], b = FILELIST[idx2][0]).ratio() > 0.85: | |
3 | 38 | #print "possible duplicate %d %s" % (idx2, item2[0]) |
5 | 39 | key = os.path.join(FILELIST[idx][2], FILELIST[idx][1]) |
3 | 40 | if not key in DUPLICATES.keys(): |
41 | DUPLICATES[key] = [] | |
42 | DUPLICATES[key].append( | |
5 | 43 | os.path.join(FILELIST[idx2][2], FILELIST[idx2][1])) |
44 | # unset the found duplicate, so that this will not be scanned again | |
45 | FILELIST[idx2] = None | |
3 | 46 | |
4 | 47 | print("\n\n\n") |
48 | idx = 1 | |
3 | 49 | for base in DUPLICATES.keys(): |
4 | 50 | print("Duplicate file set #%i" % idx) |
51 | print(base) | |
3 | 52 | for dup in DUPLICATES[base]: |
4 | 53 | print(dup) |
54 | print() | |
55 | idx += 1 |