dupecheck.py

changeset 37
5be334b71b08
parent 36
a1ad6f4728be
--- a/dupecheck.py	Wed Feb 13 14:10:55 2019 +0100
+++ b/dupecheck.py	Mon Mar 09 12:19:29 2020 +0100
@@ -2,8 +2,11 @@
 # -*- coding: utf-8 -*-
 """
 Toolkit / executable to scan for duplicate filenames in movie database
+More functions:
+ * sanitize filenames
+ * statistics
 
-2017-2019 by mdd
+2017-2020 by mdd
 """
 
 #pylint: disable=line-too-long
@@ -71,7 +74,7 @@
                 title = RE_PARENTHESES.sub("", title)
 
                 self.filelist.append([title, filename, root, ext])
-            elif ext in ['.mkv', '.avi', '.mpg', '.mpeg', '.mp4']:
+            elif ext in ['.mkv', '.avi', '.mpg', '.mpeg', '.mp4', '.ogg', '.mp3', '.iso']:
                 title = filename[:-4].lower()
                 title = RE_PARENTHESES.sub("", title)
                 self.filelist.append([title, filename, root, ext])
@@ -168,19 +171,45 @@
         """
         import re
         for item in self.filelist:
-            if not item[3] in ['.mkv', '.txt']:
+            if not item[3] in ['.mkv', '.txt', '.nfo']:
                 continue
             # any non-alphanumeric characters in filename?
             cleanfn = replace_all(item[1], {
                     #'ä':'ae', 'Ä':'Ae',
                     #'ö':'oe', 'Ö':'Oe',
                     #'ü':'ue', 'Ü':'Ue',
-                    'ß':'ss',
+                    'ß': 'ss',
+                    ':': ' -',
                 })
-            cleanfn = re.sub(r'[^A-Za-z0-9\.\_\-\(\)\&öäüÖÄÜ\' ]', '-', cleanfn)
+            cleanfn = re.sub(r'[^A-Za-z0-9\.\_\-\(\)\[\]\{\}\&öäüÖÄÜ\' ]', '-', cleanfn)
+
+            # if cleanfn.endswith(".nfo"):
+            #     # force .txt ending of nfo files
+            #     # TODO: later there should nfo files for kodi
+            #     cleanfn = cleanfn[:-4] + ".txt"
+
+            checklist = re.findall(r'\([^\(\)]+\)', cleanfn)
+            for nonyear in checklist:
+                if re.match(r'\(\d{4}\)', nonyear):
+                    continue
+                cleanfn = replace_all(cleanfn, {
+                    nonyear: replace_all(nonyear, {'(':'[', ')':']'})
+                    })
+                #print ("NONYEAR: ", nonyear)
+
+            checklist = re.findall(r'\[\d{4}[^\]]+\]', cleanfn)
+            for year in checklist:
+                cleanfn = replace_all(cleanfn, {
+                    year: replace_all(year, {
+                        year[:5]: '(' + year[1:5] + ') ['
+                        })
+                    })
+                # print ("YEAR: ", year)
+
+
             if item[1] == cleanfn:
                 continue
-            print (item[1])
+            print (item[1], " -> ", cleanfn)
             os.rename(
                 os.path.join(item[2], item[1]),
                 os.path.join(item[2], cleanfn)
@@ -311,7 +340,7 @@
 
     if args.fixnames:
         for srcstr in args.basedir:
-            dupe.scandir(srcstr, ['.txt'])
+            dupe.scandir(srcstr, ['.txt', '.nfo'])
         if len(dupe.filelist) > 0:
             print ("Checking %i file names..." % len(dupe.filelist))
             dupe.fixnames()

mercurial