23 while nbytes >= 1024 and i < len(suffixes)-1: |
24 while nbytes >= 1024 and i < len(suffixes)-1: |
24 nbytes /= 1024. |
25 nbytes /= 1024. |
25 i += 1 |
26 i += 1 |
26 f = ('%.2f' % nbytes).rstrip('0').rstrip('.') |
27 f = ('%.2f' % nbytes).rstrip('0').rstrip('.') |
27 return '%s %s' % (f, suffixes[i]) |
28 return '%s %s' % (f, suffixes[i]) |
|
29 |
|
30 def replace_all(text, dic): |
|
31 for i, j in dic.iteritems(): |
|
32 text = text.replace(i, j) |
|
33 return text |
28 |
34 |
29 class dupechecker(object): |
35 class dupechecker(object): |
30 """ |
36 """ |
31 Simple class to scan multiple directories recursive, |
37 Simple class to scan multiple directories recursive, |
32 build a list of movie filenames. |
38 build a list of movie filenames. |
63 title = title[:-3].lower() |
69 title = title[:-3].lower() |
64 self.filelist.append([title, filename, root, ext]) |
70 self.filelist.append([title, filename, root, ext]) |
65 elif ext in ['.mkv', '.avi', '.mpg', '.mpeg', '.mp4']: |
71 elif ext in ['.mkv', '.avi', '.mpg', '.mpeg', '.mp4']: |
66 title = filename[:-4].lower() |
72 title = filename[:-4].lower() |
67 self.filelist.append([title, filename, root, ext]) |
73 self.filelist.append([title, filename, root, ext]) |
|
74 elif ext in extra: |
|
75 title = filename[:-4].lower() |
|
76 self.filelist.append([title, filename, root, ext]) |
|
77 |
|
78 def fixnames(self): |
|
79 """ |
|
80 Search for defect filenames and remove illegal characters |
|
81 """ |
|
82 import re |
|
83 for item in self.filelist: |
|
84 if not item[3] in ['.mkv', '.txt']: |
|
85 continue |
|
86 # any non-alphanumeric characters in filename? |
|
87 cleanfn = replace_all(item[1], { |
|
88 #'ä':'ae', 'Ä':'Ae', |
|
89 #'ö':'oe', 'Ö':'Oe', |
|
90 #'ü':'ue', 'Ü':'Ue', |
|
91 'ß':'ss', |
|
92 }) |
|
93 cleanfn = re.sub(r'[^A-Za-z0-9\.\_\-\(\)\&öäüÖÄÜ\' ]', '-', cleanfn) |
|
94 if item[1] == cleanfn: |
|
95 continue |
|
96 print (item[1]) |
|
97 os.rename( |
|
98 os.path.join(item[2], item[1]), |
|
99 os.path.join(item[2], cleanfn) |
|
100 ) |
68 |
101 |
69 def statistics(self): |
102 def statistics(self): |
70 """ |
103 """ |
71 Summarize disk usage and print stats about found filetypes |
104 Summarize disk usage and print stats about found filetypes |
72 """ |
105 """ |
80 item[2], item[1])).st_size |
113 item[2], item[1])).st_size |
81 print ("%5s %6s %10s" % ( |
114 print ("%5s %6s %10s" % ( |
82 "File:", |
115 "File:", |
83 "Count:", |
116 "Count:", |
84 "Size:")) |
117 "Size:")) |
|
118 sum_count = 0 |
|
119 sum_size = 0.0 |
85 for ext in stats.keys(): |
120 for ext in stats.keys(): |
|
121 sum_count += stats[ext][0] |
|
122 sum_size += stats[ext][1] |
86 print ("%5s %6i %10s" % ( |
123 print ("%5s %6i %10s" % ( |
87 ext, stats[ext][0], |
124 ext, stats[ext][0], |
88 humansize(stats[ext][1]))) |
125 humansize(stats[ext][1]))) |
|
126 print ("%5s %6i %10s" % ( |
|
127 "TOTAL", sum_count, |
|
128 humansize(sum_size))) |
89 |
129 |
90 |
130 |
91 def analyze(self): |
131 def analyze(self): |
92 """ |
132 """ |
93 Analyze the scanlist for duplicates |
133 Analyze the scanlist for duplicates |
142 help='filename duplicate threshold 0.1 < ratio 1.0 (default 0.85)') |
182 help='filename duplicate threshold 0.1 < ratio 1.0 (default 0.85)') |
143 parser.add_argument('--difflib', action='store_true', default=False, \ |
183 parser.add_argument('--difflib', action='store_true', default=False, \ |
144 help='force the use of difflib instead Levenshtein') |
184 help='force the use of difflib instead Levenshtein') |
145 parser.add_argument('--stats', action='store_true', default=False, \ |
185 parser.add_argument('--stats', action='store_true', default=False, \ |
146 help='generate stats summary instead of check for duplicates') |
186 help='generate stats summary instead of check for duplicates') |
|
187 parser.add_argument('--fixnames', action='store_true', default=False, \ |
|
188 help='scan for mkv and txt, fix broken filenames for windows') |
147 parser.add_argument('basedir', metavar='basedir', nargs='+', \ |
189 parser.add_argument('basedir', metavar='basedir', nargs='+', \ |
148 help='one or more base directories') |
190 help='one or more base directories') |
149 |
191 |
150 args = parser.parse_args() |
192 args = parser.parse_args() |
151 dupe = dupechecker() |
193 dupe = dupechecker() |