44 self.basedir = "" |
45 self.basedir = "" |
45 self.filelist = [] |
46 self.filelist = [] |
46 self.duplicates = {} |
47 self.duplicates = {} |
47 self.ratio = 0.85 |
48 self.ratio = 0.85 |
48 self.ignore_fileprefix = [] |
49 self.ignore_fileprefix = [] |
|
50 self.ssh = None |
|
51 self.ssh_data = None |
49 |
52 |
50 |
53 |
51 def reset(self): |
54 def reset(self): |
52 self.filelist = [] |
55 self.filelist = [] |
53 self.duplicates = {} |
56 self.duplicates = {} |
54 |
57 |
|
58 def __scandir_files(self, root, files, extra=[]): |
|
59 for filename in files: |
|
60 ext = os.path.splitext(filename)[1].lower() |
|
61 if ext == ".ts": |
|
62 #file_path = os.path.join(root, filename) |
|
63 title = filename.split(" - ") |
|
64 if len(title) == 1: |
|
65 title = title[0] |
|
66 else: |
|
67 title = " - ".join(title[2:]) |
|
68 title = title[:-3].lower() |
|
69 |
|
70 # remove parentheses with contents in title |
|
71 title = RE_PARENTHESES.sub("", title) |
|
72 |
|
73 self.filelist.append([title, filename, root, ext]) |
|
74 elif ext in ['.mkv', '.avi', '.mpg', '.mpeg', '.mp4']: |
|
75 title = filename[:-4].lower() |
|
76 title = RE_PARENTHESES.sub("", title) |
|
77 self.filelist.append([title, filename, root, ext]) |
|
78 elif ext in extra: |
|
79 title = filename[:-4].lower() |
|
80 title = RE_PARENTHESES.sub("", title) |
|
81 self.filelist.append([title, filename, root, ext]) |
|
82 |
|
83 |
55 def scandir(self, basedir, extra=[]): |
84 def scandir(self, basedir, extra=[]): |
56 """ |
85 """ |
57 Scan a base directory for movie files and add them to |
86 Scan a base directory for movie files and add them to |
58 the list for analyze |
87 the list for analyze |
59 """ |
88 """ |
60 self.basedir = basedir |
89 self.basedir = basedir |
61 print("Scanning directory: %s" % basedir) |
90 print("Scanning directory: %s" % basedir) |
62 for root, subdirs, files in os.walk(basedir): |
91 for root, subdirs, files in os.walk(basedir): |
63 for filename in files: |
92 self.__scandir_files(root, files, extra) |
64 ext = os.path.splitext(filename)[1].lower() |
93 # print(repr(self.filelist)) |
65 if ext == ".ts": |
94 # sys.exit() |
66 #file_path = os.path.join(root, filename) |
95 |
67 title = filename.split(" - ") |
96 def scandir_remote(self, extra=[]): |
68 if len(title) == 1: |
97 """ |
69 title = title[0] |
98 connect to remote ssh servers and get file lists for duplicate check |
70 else: |
99 """ |
71 title = " - ".join(title[2:]) |
100 print("getting filelist from remote hosts...") |
72 title = title[:-3].lower() |
101 try: |
73 |
102 from config import REMOTE_HOSTS |
74 # remove parentheses with contents in title |
103 except ImportError: |
75 title = RE_PARENTHESES.sub("", title) |
104 print("Please configure REMOTE_HOSTS in config.py!") |
76 |
105 sys.exit(1) |
77 self.filelist.append([title, filename, root, ext]) |
106 try: |
78 elif ext in ['.mkv', '.avi', '.mpg', '.mpeg', '.mp4']: |
107 import paramiko |
79 title = filename[:-4].lower() |
108 self.ssh = paramiko.SSHClient() |
80 title = RE_PARENTHESES.sub("", title) |
109 #self.ssh.set_missing_host_key_policy(paramiko.WarningPolicy()) |
81 self.filelist.append([title, filename, root, ext]) |
110 self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) |
82 elif ext in extra: |
111 #self.ssh_key = paramiko.RSAKey.from_private_key_file(SSH_PRIVATE_KEY_FILE) |
83 title = filename[:-4].lower() |
112 except ImportError: |
84 title = RE_PARENTHESES.sub("", title) |
113 print("Please install Paramiko!") |
85 self.filelist.append([title, filename, root, ext]) |
114 sys.exit(1) |
|
115 |
|
116 for host in REMOTE_HOSTS: |
|
117 self.ssh_data = host |
|
118 |
|
119 cleanlist = [] |
|
120 lst = self.__ssh_exec('cd %s; ls -1 *.ts' % self.ssh_data['basedir'])[0] |
|
121 for item in lst: |
|
122 cleanlist.append(item.strip().encode('ascii','ignore')) |
|
123 self.__scandir_files("%s: %s" % ( |
|
124 self.ssh_data['host'], self.ssh_data['basedir']), cleanlist) |
|
125 # self.__scandir_files(self.ssh_data['basedir'], cleanlist) |
|
126 self.__ssh_disconnect() |
|
127 |
|
128 def __ssh_exec(self, command): |
|
129 """ |
|
130 establish ssh connection and execute command |
|
131 the connection remains open for following commands until ssh_disconnect is called |
|
132 """ |
|
133 if self.ssh is None: |
|
134 return None |
|
135 try: |
|
136 transport = self.ssh.get_transport() |
|
137 if not transport or not transport.is_active(): |
|
138 print("SSH: connecting to %s" % self.ssh_data['host']) |
|
139 self.ssh.connect(self.ssh_data['host'], self.ssh_data['port'], self.ssh_data['user'], self.ssh_data['pass'], self.ssh_data['key']) |
|
140 |
|
141 # Send the command (non-blocking) |
|
142 stdin, stdout, stderr = self.ssh.exec_command(command) |
|
143 |
|
144 # Wait for the command to terminate |
|
145 while not stdout.channel.exit_status_ready() and not stdout.channel.recv_ready(): |
|
146 time.sleep(1) |
|
147 |
|
148 stdoutstring = stdout.readlines() |
|
149 stderrstring = stderr.readlines() |
|
150 return stdoutstring, stderrstring |
|
151 finally: |
|
152 pass |
|
153 |
|
154 def __ssh_disconnect(self): |
|
155 """ |
|
156 check if ssh is connected and disconnect |
|
157 """ |
|
158 if self.ssh is not None: |
|
159 # Close client connection. |
|
160 transport = self.ssh.get_transport() |
|
161 if not transport or not transport.is_active(): |
|
162 print("SSH: disconnecting") |
|
163 self.ssh.close() |
86 |
164 |
87 def fixnames(self): |
165 def fixnames(self): |
88 """ |
166 """ |
89 Search for defect filenames and remove illegal characters |
167 Search for defect filenames and remove illegal characters |
90 """ |
168 """ |
202 help='filename duplicate threshold 0.1 < ratio 1.0 (default 0.85)') |
280 help='filename duplicate threshold 0.1 < ratio 1.0 (default 0.85)') |
203 parser.add_argument('--difflib', action='store_true', default=False, \ |
281 parser.add_argument('--difflib', action='store_true', default=False, \ |
204 help='force the use of difflib instead Levenshtein') |
282 help='force the use of difflib instead Levenshtein') |
205 parser.add_argument('--stats', action='store_true', default=False, \ |
283 parser.add_argument('--stats', action='store_true', default=False, \ |
206 help='generate stats summary instead of check for duplicates') |
284 help='generate stats summary instead of check for duplicates') |
|
285 parser.add_argument('--remote', action='store_true', default=False, \ |
|
286 help='Connect to ssh remotes, eg. dupecheck for dreambox local storage') |
207 parser.add_argument('--fixnames', action='store_true', default=False, \ |
287 parser.add_argument('--fixnames', action='store_true', default=False, \ |
208 help='scan for mkv and txt, fix broken filenames for windows') |
288 help='scan for mkv and txt, fix broken filenames for windows') |
209 parser.add_argument('basedir', metavar='basedir', nargs='+', \ |
289 parser.add_argument('basedir', metavar='basedir', nargs='+', \ |
210 help='one or more base directories') |
290 help='one or more base directories') |
211 |
291 |