Loading imgslipstream.py +25 −2 Original line number Diff line number Diff line Loading @@ -32,6 +32,7 @@ if __name__ == '__main__': p.add_argument('--files', '-f', action='store_true', help='Make local files instead of embedding.') p.add_argument('--maxlen', '-m', action='store', help='Change the max length of URLs allowed (default 100).', default=100, type=int) p.add_argument('--url', '-u', action='store', help='Provide a relative URL where files can be downloaded from.') args = p.parse_args() log.info('Arguments: %s', args) Loading @@ -43,6 +44,20 @@ if __name__ == '__main__': console_handler.setFormatter(logging.Formatter('[%(asctime)s] %(message)s')) log.addHandler(console_handler) if args.url is not None: if args.url[:4] != 'http': log.error('Unsupported protocol for URL argument.') raise argparse.ArgumentError if '://' not in args.url: log.error('URL argument is not a valid URL.') raise argparse.ArgumentError if '.' not in args.url.split('/')[2]: log.error('Invalid domain for URL argument.') raise argparse.ArgumentError if args.url[-1:] != '/': log.debug('Adding ending slash to URL argument.') args.url = args.url + '/' dest = pathlib.Path(args.source).with_suffix('.slipstream' + str(pathlib.Path(args.source).suffix)) log.info('Destination: %s', dest) dest_folder = dest.with_suffix('').with_stem(dest.stem + '_files') Loading Loading @@ -114,7 +129,8 @@ if __name__ == '__main__': ) ) else: src_end_quote = document.find('"', src_quote + 6, img_end_tag) src_quote = src_quote + 5 src_end_quote = document.find('"', src_quote + 1, img_end_tag) src_quote_type = '"' log.debug( 'using alternate (") src_end_quote = {0}, src_quote_type = {1}'.format( Loading @@ -137,6 +153,13 @@ if __name__ == '__main__': if url[:4] != 'http': if url[:len(dest_folder.name)] == dest_folder.name or url[:11] == 'data:image/': log.debug('Quietly ignore already replaced tags.') continue elif args.url is not None: if url[:1] == '/': url = args.url[:args.url.find('/', 8)] + url else: url = args.url + url log.debug('Attemping relative url retrieval with url: {0}'.format(url)) else: log.warning('src attribute not http: {0}'.format(document[img_tag - 4:img_end_tag + 40])) continue Loading Loading
imgslipstream.py +25 −2 Original line number Diff line number Diff line Loading @@ -32,6 +32,7 @@ if __name__ == '__main__': p.add_argument('--files', '-f', action='store_true', help='Make local files instead of embedding.') p.add_argument('--maxlen', '-m', action='store', help='Change the max length of URLs allowed (default 100).', default=100, type=int) p.add_argument('--url', '-u', action='store', help='Provide a relative URL where files can be downloaded from.') args = p.parse_args() log.info('Arguments: %s', args) Loading @@ -43,6 +44,20 @@ if __name__ == '__main__': console_handler.setFormatter(logging.Formatter('[%(asctime)s] %(message)s')) log.addHandler(console_handler) if args.url is not None: if args.url[:4] != 'http': log.error('Unsupported protocol for URL argument.') raise argparse.ArgumentError if '://' not in args.url: log.error('URL argument is not a valid URL.') raise argparse.ArgumentError if '.' not in args.url.split('/')[2]: log.error('Invalid domain for URL argument.') raise argparse.ArgumentError if args.url[-1:] != '/': log.debug('Adding ending slash to URL argument.') args.url = args.url + '/' dest = pathlib.Path(args.source).with_suffix('.slipstream' + str(pathlib.Path(args.source).suffix)) log.info('Destination: %s', dest) dest_folder = dest.with_suffix('').with_stem(dest.stem + '_files') Loading Loading @@ -114,7 +129,8 @@ if __name__ == '__main__': ) ) else: src_end_quote = document.find('"', src_quote + 6, img_end_tag) src_quote = src_quote + 5 src_end_quote = document.find('"', src_quote + 1, img_end_tag) src_quote_type = '"' log.debug( 'using alternate (") src_end_quote = {0}, src_quote_type = {1}'.format( Loading @@ -137,6 +153,13 @@ if __name__ == '__main__': if url[:4] != 'http': if url[:len(dest_folder.name)] == dest_folder.name or url[:11] == 'data:image/': log.debug('Quietly ignore already replaced tags.') continue elif args.url is not None: if url[:1] == '/': url = args.url[:args.url.find('/', 8)] + url else: url = args.url + url log.debug('Attemping relative url retrieval with url: {0}'.format(url)) else: log.warning('src attribute not http: {0}'.format(document[img_tag - 4:img_end_tag + 40])) continue Loading