Sat, 04 Nov 2017 22:34:12 +0100
added filename duplicate checker
#!/usr/bin/env python import difflib import os, sys BASEDIR="../DREAMBOX" FILELIST=[] DUPLICATES={} def similarity(seq1, seq2): #return difflib.SequenceMatcher(a=seq1.lower(), b=seq2.lower()).ratio() return difflib.SequenceMatcher(a=seq1, b=seq2).ratio() for root, subdirs, files in os.walk(BASEDIR): for filename in files: if filename.endswith(".ts"): file_path = os.path.join(root, filename) title = filename.split(" - ") if len(title) == 1: title = title[0] else: title = " - ".join(title[2:]) title = title[:-3].lower() FILELIST.append([title, filename, root]) elif filename.endswith(".mkv"): title = filename[:-4].lower() FILELIST.append([title, filename, root]) for idx, item in enumerate(FILELIST): comparelist = FILELIST[idx+1:] #print "%d %s (%d to compare)" % (idx, item[0], len(comparelist)) for idx2, item2 in enumerate(comparelist): if similarity(item[0], item2[0]) > 0.85: #print "possible duplicate %d %s" % (idx2, item2[0]) key = os.path.join(item[2], item[1]) if not key in DUPLICATES.keys(): DUPLICATES[key] = [] DUPLICATES[key].append( os.path.join(item2[2], item2[1])) for base in DUPLICATES.keys(): print base for dup in DUPLICATES[base]: print dup print ""