8 |
8 |
9 #pylint: disable=line-too-long |
9 #pylint: disable=line-too-long |
10 #pylint: disable=invalid-name |
10 #pylint: disable=invalid-name |
11 |
11 |
12 from __future__ import print_function |
12 from __future__ import print_function |
13 import os, sys |
13 import os, sys, re |
|
14 |
|
15 RE_PARENTHESES = re.compile("[\(\[].*?[\)\]]") |
14 |
16 |
15 def similarity(a, b): |
17 def similarity(a, b): |
16 if DIFFLIB: |
18 if DIFFLIB: |
17 return difflib.SequenceMatcher(a=a, b=b).ratio() |
19 return difflib.SequenceMatcher(a=a, b=b).ratio() |
18 else: |
20 else: |
65 if len(title) == 1: |
68 if len(title) == 1: |
66 title = title[0] |
69 title = title[0] |
67 else: |
70 else: |
68 title = " - ".join(title[2:]) |
71 title = " - ".join(title[2:]) |
69 title = title[:-3].lower() |
72 title = title[:-3].lower() |
|
73 |
|
74 # remove parentheses with contents in title |
|
75 title = RE_PARENTHESES.sub("", title) |
|
76 |
70 self.filelist.append([title, filename, root, ext]) |
77 self.filelist.append([title, filename, root, ext]) |
71 elif ext in ['.mkv', '.avi', '.mpg', '.mpeg', '.mp4']: |
78 elif ext in ['.mkv', '.avi', '.mpg', '.mpeg', '.mp4']: |
72 title = filename[:-4].lower() |
79 title = filename[:-4].lower() |
|
80 title = RE_PARENTHESES.sub("", title) |
73 self.filelist.append([title, filename, root, ext]) |
81 self.filelist.append([title, filename, root, ext]) |
74 elif ext in extra: |
82 elif ext in extra: |
75 title = filename[:-4].lower() |
83 title = filename[:-4].lower() |
|
84 title = RE_PARENTHESES.sub("", title) |
76 self.filelist.append([title, filename, root, ext]) |
85 self.filelist.append([title, filename, root, ext]) |
77 |
86 |
78 def fixnames(self): |
87 def fixnames(self): |
79 """ |
88 """ |
80 Search for defect filenames and remove illegal characters |
89 Search for defect filenames and remove illegal characters |
130 |
139 |
131 def analyze(self): |
140 def analyze(self): |
132 """ |
141 """ |
133 Analyze the scanlist for duplicates |
142 Analyze the scanlist for duplicates |
134 """ |
143 """ |
|
144 listlen = len(self.filelist) |
135 print("%i files to analyze, running duplicate testing loop..." % ( |
145 print("%i files to analyze, running duplicate testing loop..." % ( |
136 len(self.filelist))) |
146 listlen)) |
137 |
147 |
138 listlen = len(self.filelist) |
148 # remove potentially unwanted entries from the list |
|
149 if len(self.ignore_fileprefix) > 0: |
|
150 for idx in reversed(range(listlen)): |
|
151 for tst in self.ignore_fileprefix: |
|
152 if tst == '': |
|
153 continue |
|
154 if self.filelist[idx][0].startswith(tst): |
|
155 del self.filelist[idx] |
|
156 break |
|
157 listlen = len(self.filelist) |
|
158 |
139 for idx in range(listlen): |
159 for idx in range(listlen): |
140 if not self.filelist[idx]: |
160 if not self.filelist[idx]: |
141 continue |
161 continue |
142 print("\r%d %s\033[K" % ( |
162 print("\r%d %s\033[K" % ( |
143 idx, self.filelist[idx][0]), end='') |
163 idx, self.filelist[idx][0]), end='') |
202 except ImportError: |
222 except ImportError: |
203 import difflib |
223 import difflib |
204 DIFFLIB = True |
224 DIFFLIB = True |
205 print("Consider 'pip install python-Levenshtein' for faster analyze") |
225 print("Consider 'pip install python-Levenshtein' for faster analyze") |
206 |
226 |
|
227 if os.path.isfile("dupecheck-ignore.txt"): |
|
228 # read the entire file line by line into buffer |
|
229 print("Loading ignore filename prefixes file for dupe checking...") |
|
230 dupe.ignore_fileprefix = [line.rstrip('\n').rstrip('\r') for line in open("dupecheck-ignore.txt", "rb")] |
207 |
231 |
208 if args.fixnames: |
232 if args.fixnames: |
209 for srcstr in args.basedir: |
233 for srcstr in args.basedir: |
210 dupe.scandir(srcstr, ['.txt']) |
234 dupe.scandir(srcstr, ['.txt']) |
211 if len(dupe.filelist) > 0: |
235 if len(dupe.filelist) > 0: |