dupecheck.py

changeset 39
6f8ece8a6aee
parent 38
ef9db9d5103c
equal deleted inserted replaced
38:ef9db9d5103c 39:6f8ece8a6aee
260 continue 260 continue
261 if self.filelist[idx][0].startswith(tst): 261 if self.filelist[idx][0].startswith(tst):
262 del self.filelist[idx] 262 del self.filelist[idx]
263 break 263 break
264 listlen = len(self.filelist) 264 listlen = len(self.filelist)
265 print("%i files left to analyze after revoving duplicates" % (
266 listlen))
267
265 268
266 for idx in range(listlen): 269 for idx in range(listlen):
267 if not self.filelist[idx]: 270 if not self.filelist[idx]:
268 continue 271 continue
269 print("\r%d %s\033[K" % ( 272 print("\r%d %s\033[K" % (
270 idx, self.filelist[idx][0]), end='') 273 idx, self.filelist[idx][0]), end='')
271 sys.stdout.flush() 274 sys.stdout.flush()
272 for idx2 in range(idx + 1, listlen): 275 for idx2 in range(idx + 1, listlen):
273 if self.filelist[idx2]: 276 if self.filelist[idx2]:
277 if not self.filelist[idx2]:
278 continue
274 if similarity(self.filelist[idx][0], self.filelist[idx2][0]) > self.ratio: 279 if similarity(self.filelist[idx][0], self.filelist[idx2][0]) > self.ratio:
275 #print "possible duplicate %d %s" % (idx2, item2[0])
276 key = os.path.join(self.filelist[idx][2], self.filelist[idx][1]) 280 key = os.path.join(self.filelist[idx][2], self.filelist[idx][1])
277 if not key in self.duplicates: 281 if not key in self.duplicates:
278 self.duplicates[key] = [] 282 self.duplicates[key] = []
279 self.duplicates[key].append( 283 self.duplicates[key].append(
280 os.path.join( 284 os.path.join(
334 print("Consider 'pip install python-Levenshtein' for faster analyze") 338 print("Consider 'pip install python-Levenshtein' for faster analyze")
335 339
336 if os.path.isfile("dupecheck-ignore.txt"): 340 if os.path.isfile("dupecheck-ignore.txt"):
337 # read the entire file line by line into buffer 341 # read the entire file line by line into buffer
338 print("Loading ignore filename prefixes file for dupe checking...") 342 print("Loading ignore filename prefixes file for dupe checking...")
339 dupe.ignore_fileprefix = [line.rstrip('\n').rstrip('\r') for line in open("dupecheck-ignore.txt", "rb")] 343 dupe.ignore_fileprefix = [line.lower().rstrip('\n').rstrip('\r') for line in open("dupecheck-ignore.txt", "rb")]
344 #print(dupe.ignore_fileprefix)
340 345
341 if args.fixnames: 346 if args.fixnames:
342 for srcstr in args.basedir: 347 for srcstr in args.basedir:
343 dupe.scandir(srcstr, ['.txt', '.nfo']) 348 dupe.scandir(srcstr, ['.txt', '.nfo'])
344 if len(dupe.filelist) > 0: 349 if len(dupe.filelist) > 0:

mercurial