|
1 #!/usr/bin/env python |
|
2 |
|
3 import difflib |
|
4 import os, sys |
|
5 |
|
6 BASEDIR="../DREAMBOX" |
|
7 |
|
8 FILELIST=[] |
|
9 DUPLICATES={} |
|
10 |
|
11 def similarity(seq1, seq2): |
|
12 #return difflib.SequenceMatcher(a=seq1.lower(), b=seq2.lower()).ratio() |
|
13 return difflib.SequenceMatcher(a=seq1, b=seq2).ratio() |
|
14 |
|
15 for root, subdirs, files in os.walk(BASEDIR): |
|
16 for filename in files: |
|
17 if filename.endswith(".ts"): |
|
18 file_path = os.path.join(root, filename) |
|
19 title = filename.split(" - ") |
|
20 if len(title) == 1: |
|
21 title = title[0] |
|
22 else: |
|
23 title = " - ".join(title[2:]) |
|
24 title = title[:-3].lower() |
|
25 FILELIST.append([title, filename, root]) |
|
26 elif filename.endswith(".mkv"): |
|
27 title = filename[:-4].lower() |
|
28 FILELIST.append([title, filename, root]) |
|
29 |
|
30 |
|
31 for idx, item in enumerate(FILELIST): |
|
32 comparelist = FILELIST[idx+1:] |
|
33 #print "%d %s (%d to compare)" % (idx, item[0], len(comparelist)) |
|
34 for idx2, item2 in enumerate(comparelist): |
|
35 if similarity(item[0], item2[0]) > 0.85: |
|
36 #print "possible duplicate %d %s" % (idx2, item2[0]) |
|
37 key = os.path.join(item[2], item[1]) |
|
38 if not key in DUPLICATES.keys(): |
|
39 DUPLICATES[key] = [] |
|
40 DUPLICATES[key].append( |
|
41 os.path.join(item2[2], item2[1])) |
|
42 |
|
43 for base in DUPLICATES.keys(): |
|
44 print base |
|
45 for dup in DUPLICATES[base]: |
|
46 print dup |
|
47 print "" |