#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
import os
import re
import base64
import mimetypes
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
namespace = ""
def usage():
f = os.path.basename(__file__)
a = f"""
usage:
{f} [filepath]
"""
print(a)
def check_file_extension(f, check):
r, ext = os.path.splitext(f)
return ext.lower() == "." + check.lower()
def isSvg(f):
return check_file_extension(f, "svg")
def isHtml(f):
return check_file_extension(f, "html") or check_file_extension(f, "htm")
def check_arg():
if len(sys.argv) < 2:
return None
if os.path.isfile(sys.argv[1]):
return sys.argv[1]
return None
def output_filename(f):
r, ext = os.path.splitext(f)
return r + ".merged" + ext
def supported_image(mime):
return (mime == 'image/png'
or mime == 'image/bmp'
or mime == 'image/gif'
or mime == 'image/jpeg')
def find_all_elements(node, tag):
global namespace
elements = []
full_tag = f"{{{namespace}}}{tag}"
# print(node.tag)
if node.tag == full_tag:
elements.append(node)
for child in node:
elements.extend(find_all_elements(child, tag))
return elements
def get_svg_content(filepath):
with open(filepath, "r") as f:
content = f.read()
index = content.find("<svg ")
if index != -1:
return content[index:]
def mergeHtml(target, output):
soup = BeautifulSoup(open(target), 'html.parser')
changed = False
for elm in soup.find_all('img'):
image_file = elm['src']
if not os.path.isfile(image_file):
continue
mime_type, _ = mimetypes.guess_type(image_file)
if mime_type == "image/svg+xml":
svg = BeautifulSoup(open(image_file), 'html.parser')
#print(svg)
elm.replace_with(svg)
changed = True
continue
if not supported_image(mime_type):
continue
with open(image_file, mode="rb") as f:
convert_byte: bytes = base64.b64encode(f.read())
convert_str: str = convert_byte.decode('ascii')
prefix = f"data:{mime_type};base64,"
elm['src'] = prefix + convert_str
changed = True
if changed:
with open(output, 'w', encoding='utf-8') as f:
f.write(str(soup))
return changed
def mergeSvg(target, output):
tree = ET.parse(target)
namespaces = {node[0]: node[1] for _, node in ET.iterparse(target, events=['start-ns'])}
for key, value in namespaces.items():
ET.register_namespace(key, value)
root = tree.getroot()
global namespace
namespace = root.tag.split('}')[0].strip('{')
changed = False
for elm in find_all_elements(root, 'image'):
for key, attr in elm.attrib.items():
if re.sub(r'\{[^}]+\}', '', key) != "href":
continue
if not os.path.isfile(attr):
continue
mime_type, _ = mimetypes.guess_type(attr)
if not supported_image(mime_type):
continue
with open(attr, mode="rb") as f:
convert_byte: bytes = base64.b64encode(f.read())
convert_str: str = convert_byte.decode('ascii')
prefix = f"data:{mime_type};base64,"
elm.set(key, prefix + convert_str)
changed = True
if changed:
with open(output, mode="w") as f:
f.write(ET.tostring(root, encoding='utf-8', method='xml').decode('utf-8'))
return changed
def merge(target):
output = output_filename(target)
if os.path.isfile(output):
print(f"Error: {output} is exists")
exit()
res = False
if isHtml(target):
res = mergeHtml(target, output)
elif isSvg(target):
res = mergeSvg(target, output)
if res:
print(f"saved {output}")
def main():
target = check_arg()
if not target:
usage()
exit()
merge(target)
if __name__ == "__main__":
main()
Register as a new user and use Qiita more conveniently
- You get articles that match your needs
- You can efficiently read back useful information
- You can use dark theme