Sat, 04 Nov 2017 22:34:12 +0100
added filename duplicate checker
3 | 1 | #!/usr/bin/env python |
2 | ||
3 | import difflib | |
4 | import os, sys | |
5 | ||
6 | BASEDIR="../DREAMBOX" | |
7 | ||
8 | FILELIST=[] | |
9 | DUPLICATES={} | |
10 | ||
11 | def similarity(seq1, seq2): | |
12 | #return difflib.SequenceMatcher(a=seq1.lower(), b=seq2.lower()).ratio() | |
13 | return difflib.SequenceMatcher(a=seq1, b=seq2).ratio() | |
14 | ||
15 | for root, subdirs, files in os.walk(BASEDIR): | |
16 | for filename in files: | |
17 | if filename.endswith(".ts"): | |
18 | file_path = os.path.join(root, filename) | |
19 | title = filename.split(" - ") | |
20 | if len(title) == 1: | |
21 | title = title[0] | |
22 | else: | |
23 | title = " - ".join(title[2:]) | |
24 | title = title[:-3].lower() | |
25 | FILELIST.append([title, filename, root]) | |
26 | elif filename.endswith(".mkv"): | |
27 | title = filename[:-4].lower() | |
28 | FILELIST.append([title, filename, root]) | |
29 | ||
30 | ||
31 | for idx, item in enumerate(FILELIST): | |
32 | comparelist = FILELIST[idx+1:] | |
33 | #print "%d %s (%d to compare)" % (idx, item[0], len(comparelist)) | |
34 | for idx2, item2 in enumerate(comparelist): | |
35 | if similarity(item[0], item2[0]) > 0.85: | |
36 | #print "possible duplicate %d %s" % (idx2, item2[0]) | |
37 | key = os.path.join(item[2], item[1]) | |
38 | if not key in DUPLICATES.keys(): | |
39 | DUPLICATES[key] = [] | |
40 | DUPLICATES[key].append( | |
41 | os.path.join(item2[2], item2[1])) | |
42 | ||
43 | for base in DUPLICATES.keys(): | |
44 | print base | |
45 | for dup in DUPLICATES[base]: | |
46 | print dup | |
47 | print "" |