Loading imgslipstream.py +37 −11 Original line number Diff line number Diff line Loading @@ -30,7 +30,8 @@ if __name__ == '__main__': p.add_argument('source') p.add_argument('--verbose', '-v', action='store_true', help='Print more logging to the console.') p.add_argument('--files', '-f', action='store_true', help='Make local files instead of embedding.') p.add_argument('--maxlen', '-m', action='store', help='Change the max length of URLs allowed (default 100).', default=100, type=int) p.add_argument('--maxlen', '-m', action='store', help='Change the max length of URLs allowed (default 100).', default=100, type=int) args = p.parse_args() log.info('Arguments: %s', args) Loading Loading @@ -75,14 +76,18 @@ if __name__ == '__main__': elif document[img_tag - 4:img_tag] == '<': img_end_tag = document.find('>', img_tag) log.debug('img_end_tag = {0}'.format(img_end_tag)) elif document[img_tag:img_tag + 5] == 'img {' or document[img_tag - 1:img_tag + 4] == ',img,': log.debug('Quietly skipping a CSS tag.') elif document[img_tag:img_tag + 5] == 'img {' or document[img_tag - 1:img_tag + 4] == ',img,' \ or document[img_tag - 1:img_tag + 4] == '|img|': log.debug('Quietly skipping a CSS or JS reference to an img tag.') continue elif document[img_tag - 2:img_tag + 9] == 'i.imgur.com' or document[img_tag - 6:img_tag + 6] == 'i.postimg.cc': elif document[img_tag - 2:img_tag + 9] == 'i.imgur.com' \ or document[img_tag - 6:img_tag + 6] == 'i.postimg.cc': log.debug('Quietly skipping a domain with img in it (not an img tag).') continue else: log.warning('img keyword found without < or <, skipped: {0}'.format(document[img_tag - 4:img_tag + 40])) log.warning( 'img keyword found without < or <, skipped: {0}'.format(document[img_tag - 4:img_tag + 40]) ) continue src_attr = document.find('src', img_tag, img_end_tag) Loading @@ -103,15 +108,27 @@ if __name__ == '__main__': log.debug('using last resort (src + 3) src_quote = {0}'.format(src_quote)) src_end_quote = document.find(' ', src_quote + 1) src_quote_type = '' log.debug('using alternate ( ) src_end_quote = {0}, src_quote_type = {1}'.format(src_end_quote, src_quote_type)) log.debug( 'using alternate ( ) src_end_quote = {0}, src_quote_type = {1}'.format( src_end_quote, src_quote_type ) ) else: src_end_quote = document.find('"', src_quote + 6, img_end_tag) src_quote_type = '"' log.debug('using alternate (") src_end_quote = {0}, src_quote_type = {1}'.format(src_end_quote, src_quote_type)) log.debug( 'using alternate (") src_end_quote = {0}, src_quote_type = {1}'.format( src_end_quote, src_quote_type ) ) else: src_end_quote = document.find('"', src_quote + 1, img_end_tag) src_quote_type = '"' log.debug('using main (") src_end_quote = {0}, src_quote_type = {1}'.format(src_end_quote, src_quote_type)) log.debug( 'using main (") src_end_quote = {0}, src_quote_type = {1}'.format( src_end_quote, src_quote_type ) ) url = document[src_quote + 1:src_end_quote].strip() log.debug('url = {0}'.format(url)) if len(url) < 5: Loading @@ -124,7 +141,11 @@ if __name__ == '__main__': log.warning('src attribute not http: {0}'.format(document[img_tag - 4:img_end_tag + 40])) continue if len(url) > args.maxlen: log.warning('src attribute too long (use -m if required): {0}'.format(document[img_tag - 4:img_tag + args.maxlen])) log.warning( 'src attribute too long (use -m if required): {0}'.format( document[img_tag - 4:img_tag + args.maxlen] ) ) continue if '.' not in url: log.warning('Invalid URL, no dots (.) found: {0}'.format(document[img_tag - 4:img_end_tag + 40])) Loading @@ -139,7 +160,11 @@ if __name__ == '__main__': req = requests.get(url) log.debug('req = {0} {1}'.format(req.status_code, req.reason)) if req.status_code != 200: log.warning('{1} {2}: {0}'.format(document[img_tag - 4:img_end_tag + 40], req.status_code, req.reason)) log.warning( '{1} {2}: {0}'.format( document[img_tag - 4:img_end_tag + 40], req.status_code, req.reason ) ) continue orig = document[src_attr:src_end_quote + len(src_quote_type)] count = document.count(orig) Loading @@ -151,7 +176,8 @@ if __name__ == '__main__': replacement = 'src=' + src_quote_type + dest_folder.name + '/' + dest_file.name + src_quote_type log.debug('replacement = {0}'.format(replacement)) else: replacement = 'src=' + src_quote_type + 'data:image/' + file_type + ';base64,' + base64.b64encode(req.content).decode() + src_quote_type replacement = 'src=' + src_quote_type + 'data:image/' + file_type + ';base64,' + \ base64.b64encode(req.content).decode() + src_quote_type log.debug('replacement[:40]...[-10:] = {0}...{1}'.format(replacement[:40], replacement[-10:])) document = document.replace(orig, replacement) log.info('Replaced {0} instances of: {1}'.format(count, orig)) Loading Loading
imgslipstream.py +37 −11 Original line number Diff line number Diff line Loading @@ -30,7 +30,8 @@ if __name__ == '__main__': p.add_argument('source') p.add_argument('--verbose', '-v', action='store_true', help='Print more logging to the console.') p.add_argument('--files', '-f', action='store_true', help='Make local files instead of embedding.') p.add_argument('--maxlen', '-m', action='store', help='Change the max length of URLs allowed (default 100).', default=100, type=int) p.add_argument('--maxlen', '-m', action='store', help='Change the max length of URLs allowed (default 100).', default=100, type=int) args = p.parse_args() log.info('Arguments: %s', args) Loading Loading @@ -75,14 +76,18 @@ if __name__ == '__main__': elif document[img_tag - 4:img_tag] == '<': img_end_tag = document.find('>', img_tag) log.debug('img_end_tag = {0}'.format(img_end_tag)) elif document[img_tag:img_tag + 5] == 'img {' or document[img_tag - 1:img_tag + 4] == ',img,': log.debug('Quietly skipping a CSS tag.') elif document[img_tag:img_tag + 5] == 'img {' or document[img_tag - 1:img_tag + 4] == ',img,' \ or document[img_tag - 1:img_tag + 4] == '|img|': log.debug('Quietly skipping a CSS or JS reference to an img tag.') continue elif document[img_tag - 2:img_tag + 9] == 'i.imgur.com' or document[img_tag - 6:img_tag + 6] == 'i.postimg.cc': elif document[img_tag - 2:img_tag + 9] == 'i.imgur.com' \ or document[img_tag - 6:img_tag + 6] == 'i.postimg.cc': log.debug('Quietly skipping a domain with img in it (not an img tag).') continue else: log.warning('img keyword found without < or <, skipped: {0}'.format(document[img_tag - 4:img_tag + 40])) log.warning( 'img keyword found without < or <, skipped: {0}'.format(document[img_tag - 4:img_tag + 40]) ) continue src_attr = document.find('src', img_tag, img_end_tag) Loading @@ -103,15 +108,27 @@ if __name__ == '__main__': log.debug('using last resort (src + 3) src_quote = {0}'.format(src_quote)) src_end_quote = document.find(' ', src_quote + 1) src_quote_type = '' log.debug('using alternate ( ) src_end_quote = {0}, src_quote_type = {1}'.format(src_end_quote, src_quote_type)) log.debug( 'using alternate ( ) src_end_quote = {0}, src_quote_type = {1}'.format( src_end_quote, src_quote_type ) ) else: src_end_quote = document.find('"', src_quote + 6, img_end_tag) src_quote_type = '"' log.debug('using alternate (") src_end_quote = {0}, src_quote_type = {1}'.format(src_end_quote, src_quote_type)) log.debug( 'using alternate (") src_end_quote = {0}, src_quote_type = {1}'.format( src_end_quote, src_quote_type ) ) else: src_end_quote = document.find('"', src_quote + 1, img_end_tag) src_quote_type = '"' log.debug('using main (") src_end_quote = {0}, src_quote_type = {1}'.format(src_end_quote, src_quote_type)) log.debug( 'using main (") src_end_quote = {0}, src_quote_type = {1}'.format( src_end_quote, src_quote_type ) ) url = document[src_quote + 1:src_end_quote].strip() log.debug('url = {0}'.format(url)) if len(url) < 5: Loading @@ -124,7 +141,11 @@ if __name__ == '__main__': log.warning('src attribute not http: {0}'.format(document[img_tag - 4:img_end_tag + 40])) continue if len(url) > args.maxlen: log.warning('src attribute too long (use -m if required): {0}'.format(document[img_tag - 4:img_tag + args.maxlen])) log.warning( 'src attribute too long (use -m if required): {0}'.format( document[img_tag - 4:img_tag + args.maxlen] ) ) continue if '.' not in url: log.warning('Invalid URL, no dots (.) found: {0}'.format(document[img_tag - 4:img_end_tag + 40])) Loading @@ -139,7 +160,11 @@ if __name__ == '__main__': req = requests.get(url) log.debug('req = {0} {1}'.format(req.status_code, req.reason)) if req.status_code != 200: log.warning('{1} {2}: {0}'.format(document[img_tag - 4:img_end_tag + 40], req.status_code, req.reason)) log.warning( '{1} {2}: {0}'.format( document[img_tag - 4:img_end_tag + 40], req.status_code, req.reason ) ) continue orig = document[src_attr:src_end_quote + len(src_quote_type)] count = document.count(orig) Loading @@ -151,7 +176,8 @@ if __name__ == '__main__': replacement = 'src=' + src_quote_type + dest_folder.name + '/' + dest_file.name + src_quote_type log.debug('replacement = {0}'.format(replacement)) else: replacement = 'src=' + src_quote_type + 'data:image/' + file_type + ';base64,' + base64.b64encode(req.content).decode() + src_quote_type replacement = 'src=' + src_quote_type + 'data:image/' + file_type + ';base64,' + \ base64.b64encode(req.content).decode() + src_quote_type log.debug('replacement[:40]...[-10:] = {0}...{1}'.format(replacement[:40], replacement[-10:])) document = document.replace(orig, replacement) log.info('Replaced {0} instances of: {1}'.format(count, orig)) Loading