260 continue |
260 continue |
261 if self.filelist[idx][0].startswith(tst): |
261 if self.filelist[idx][0].startswith(tst): |
262 del self.filelist[idx] |
262 del self.filelist[idx] |
263 break |
263 break |
264 listlen = len(self.filelist) |
264 listlen = len(self.filelist) |
|
265 print("%i files left to analyze after revoving duplicates" % ( |
|
266 listlen)) |
|
267 |
265 |
268 |
266 for idx in range(listlen): |
269 for idx in range(listlen): |
267 if not self.filelist[idx]: |
270 if not self.filelist[idx]: |
268 continue |
271 continue |
269 print("\r%d %s\033[K" % ( |
272 print("\r%d %s\033[K" % ( |
270 idx, self.filelist[idx][0]), end='') |
273 idx, self.filelist[idx][0]), end='') |
271 sys.stdout.flush() |
274 sys.stdout.flush() |
272 for idx2 in range(idx + 1, listlen): |
275 for idx2 in range(idx + 1, listlen): |
273 if self.filelist[idx2]: |
276 if self.filelist[idx2]: |
|
277 if not self.filelist[idx2]: |
|
278 continue |
274 if similarity(self.filelist[idx][0], self.filelist[idx2][0]) > self.ratio: |
279 if similarity(self.filelist[idx][0], self.filelist[idx2][0]) > self.ratio: |
275 #print "possible duplicate %d %s" % (idx2, item2[0]) |
|
276 key = os.path.join(self.filelist[idx][2], self.filelist[idx][1]) |
280 key = os.path.join(self.filelist[idx][2], self.filelist[idx][1]) |
277 if not key in self.duplicates: |
281 if not key in self.duplicates: |
278 self.duplicates[key] = [] |
282 self.duplicates[key] = [] |
279 self.duplicates[key].append( |
283 self.duplicates[key].append( |
280 os.path.join( |
284 os.path.join( |
334 print("Consider 'pip install python-Levenshtein' for faster analyze") |
338 print("Consider 'pip install python-Levenshtein' for faster analyze") |
335 |
339 |
336 if os.path.isfile("dupecheck-ignore.txt"): |
340 if os.path.isfile("dupecheck-ignore.txt"): |
337 # read the entire file line by line into buffer |
341 # read the entire file line by line into buffer |
338 print("Loading ignore filename prefixes file for dupe checking...") |
342 print("Loading ignore filename prefixes file for dupe checking...") |
339 dupe.ignore_fileprefix = [line.rstrip('\n').rstrip('\r') for line in open("dupecheck-ignore.txt", "rb")] |
343 dupe.ignore_fileprefix = [line.lower().rstrip('\n').rstrip('\r') for line in open("dupecheck-ignore.txt", "rb")] |
|
344 #print(dupe.ignore_fileprefix) |
340 |
345 |
341 if args.fixnames: |
346 if args.fixnames: |
342 for srcstr in args.basedir: |
347 for srcstr in args.basedir: |
343 dupe.scandir(srcstr, ['.txt', '.nfo']) |
348 dupe.scandir(srcstr, ['.txt', '.nfo']) |
344 if len(dupe.filelist) > 0: |
349 if len(dupe.filelist) > 0: |