added support for remote ssh dupe checking against local basedir

2019-02-13

author
mdd
date
Wed, 13 Feb 2019 14:10:55 +0100 (2019-02-13)
changeset 36
a1ad6f4728be
parent 35
14c966c10648
child 37
5be334b71b08

added support for remote ssh dupe checking against local basedir

.hgignore file | annotate | diff | comparison | revisions
config-dist.py file | annotate | diff | comparison | revisions
dupecheck.py file | annotate | diff | comparison | revisions
--- a/.hgignore	Thu Oct 04 02:06:57 2018 +0200
+++ b/.hgignore	Wed Feb 13 14:10:55 2019 +0100
@@ -1,5 +1,7 @@
 syntax: glob
 
+config.py
+
 *.pyc
 eit.old/*
 testfiles/*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/config-dist.py	Wed Feb 13 14:10:55 2019 +0100
@@ -0,0 +1,10 @@
+REMOTE_HOSTS = [
+    {
+        'host': 'dm820',
+        'port': 22,
+        'user': 'root',
+        'pass': 'password',
+        'key': None,
+        'basedir': '/media/hdd/movie'
+    },
+]
\ No newline at end of file
--- a/dupecheck.py	Thu Oct 04 02:06:57 2018 +0200
+++ b/dupecheck.py	Wed Feb 13 14:10:55 2019 +0100
@@ -3,7 +3,7 @@
 """
 Toolkit / executable to scan for duplicate filenames in movie database
 
-2017 by mdd
+2017-2019 by mdd
 """
 
 #pylint: disable=line-too-long
@@ -11,6 +11,7 @@
 
 from __future__ import print_function
 import os, sys, re
+import time
 
 RE_PARENTHESES = re.compile("[\(\[].*?[\)\]]")
 
@@ -46,12 +47,40 @@
         self.duplicates = {}
         self.ratio = 0.85
         self.ignore_fileprefix = []
+        self.ssh = None
+        self.ssh_data = None
 
 
     def reset(self):
         self.filelist = []
         self.duplicates = {}
 
+    def __scandir_files(self, root, files, extra=[]):
+        for filename in files:
+            ext = os.path.splitext(filename)[1].lower()
+            if ext == ".ts":
+                #file_path = os.path.join(root, filename)
+                title = filename.split(" - ")
+                if len(title) == 1:
+                    title = title[0]
+                else:
+                    title = " - ".join(title[2:])
+                title = title[:-3].lower()
+
+                # remove parentheses with contents in title
+                title = RE_PARENTHESES.sub("", title)
+
+                self.filelist.append([title, filename, root, ext])
+            elif ext in ['.mkv', '.avi', '.mpg', '.mpeg', '.mp4']:
+                title = filename[:-4].lower()
+                title = RE_PARENTHESES.sub("", title)
+                self.filelist.append([title, filename, root, ext])
+            elif ext in extra:
+                title = filename[:-4].lower()
+                title = RE_PARENTHESES.sub("", title)
+                self.filelist.append([title, filename, root, ext])
+
+
     def scandir(self, basedir, extra=[]):
         """
         Scan a base directory for movie files and add them to
@@ -60,29 +89,78 @@
         self.basedir = basedir
         print("Scanning directory: %s" % basedir)
         for root, subdirs, files in os.walk(basedir):
-            for filename in files:
-                ext = os.path.splitext(filename)[1].lower()
-                if ext == ".ts":
-                    #file_path = os.path.join(root, filename)
-                    title = filename.split(" - ")
-                    if len(title) == 1:
-                        title = title[0]
-                    else:
-                        title = " - ".join(title[2:])
-                    title = title[:-3].lower()
+            self.__scandir_files(root, files, extra)
+        # print(repr(self.filelist))
+        # sys.exit()
+
+    def scandir_remote(self, extra=[]):
+        """
+        connect to remote ssh servers and get file lists for duplicate check
+        """
+        print("getting filelist from remote hosts...")
+        try:
+            from config import REMOTE_HOSTS
+        except ImportError:
+            print("Please configure REMOTE_HOSTS in config.py!")
+            sys.exit(1)
+        try:
+            import paramiko
+            self.ssh = paramiko.SSHClient()
+            #self.ssh.set_missing_host_key_policy(paramiko.WarningPolicy())
+            self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+            #self.ssh_key = paramiko.RSAKey.from_private_key_file(SSH_PRIVATE_KEY_FILE)
+        except ImportError:
+            print("Please install Paramiko!")
+            sys.exit(1)
+
+        for host in REMOTE_HOSTS:
+            self.ssh_data = host
+
+            cleanlist = []
+            lst = self.__ssh_exec('cd %s; ls -1 *.ts' % self.ssh_data['basedir'])[0]
+            for item in lst:
+                cleanlist.append(item.strip().encode('ascii','ignore'))
+            self.__scandir_files("%s: %s" % (
+                self.ssh_data['host'], self.ssh_data['basedir']), cleanlist)
+            # self.__scandir_files(self.ssh_data['basedir'], cleanlist)
+            self.__ssh_disconnect()
 
-                    # remove parentheses with contents in title
-                    title = RE_PARENTHESES.sub("", title)
+    def __ssh_exec(self, command):
+        """
+        establish ssh connection and execute command
+        the connection remains open for following commands until ssh_disconnect is called
+        """
+        if self.ssh is None:
+            return None
+        try:
+            transport = self.ssh.get_transport()
+            if not transport or not transport.is_active():
+                print("SSH: connecting to %s" % self.ssh_data['host'])
+                self.ssh.connect(self.ssh_data['host'], self.ssh_data['port'], self.ssh_data['user'], self.ssh_data['pass'], self.ssh_data['key'])
+
+            # Send the command (non-blocking)
+            stdin, stdout, stderr = self.ssh.exec_command(command)
 
-                    self.filelist.append([title, filename, root, ext])
-                elif ext in ['.mkv', '.avi', '.mpg', '.mpeg', '.mp4']:
-                    title = filename[:-4].lower()
-                    title = RE_PARENTHESES.sub("", title)
-                    self.filelist.append([title, filename, root, ext])
-                elif ext in extra:
-                    title = filename[:-4].lower()
-                    title = RE_PARENTHESES.sub("", title)
-                    self.filelist.append([title, filename, root, ext])
+            # Wait for the command to terminate
+            while not stdout.channel.exit_status_ready() and not stdout.channel.recv_ready():
+                time.sleep(1)
+
+            stdoutstring = stdout.readlines()
+            stderrstring = stderr.readlines()
+            return stdoutstring, stderrstring
+        finally:
+            pass
+
+    def __ssh_disconnect(self):
+        """
+        check if ssh is connected and disconnect
+        """
+        if self.ssh is not None:
+            # Close client connection.
+            transport = self.ssh.get_transport()
+            if not transport or not transport.is_active():
+                print("SSH: disconnecting")
+                self.ssh.close()
 
     def fixnames(self):
         """
@@ -204,6 +282,8 @@
         help='force the use of difflib instead Levenshtein')
     parser.add_argument('--stats', action='store_true', default=False, \
         help='generate stats summary instead of check for duplicates')
+    parser.add_argument('--remote', action='store_true', default=False, \
+        help='Connect to ssh remotes, eg. dupecheck for dreambox local storage')
     parser.add_argument('--fixnames', action='store_true', default=False, \
         help='scan for mkv and txt, fix broken filenames for windows')
     parser.add_argument('basedir', metavar='basedir', nargs='+', \
@@ -238,6 +318,9 @@
             dupe.filelist = []
         sys.exit(0)
 
+    if args.remote:
+        dupe.scandir_remote()
+
     for srcstr in args.basedir:
         dupe.scandir(srcstr)
 

mercurial