ローカルのsvgとかhtmlが参照している画像を埋め込んで1ファイルにする

Posted at 2024-07-17
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import sys
import os
import re
import base64
import mimetypes
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup


namespace = ""

def usage():
    f = os.path.basename(__file__)
    a = f"""
usage:
    {f} [filepath]
"""
    print(a)

def check_file_extension(f, check):
    r, ext = os.path.splitext(f)
    return ext.lower() == "." + check.lower()

def isSvg(f):
    return check_file_extension(f, "svg")

def isHtml(f):
    return check_file_extension(f, "html") or check_file_extension(f, "htm") 

def check_arg():
    if len(sys.argv) < 2:
        return None
    if os.path.isfile(sys.argv[1]):
        return sys.argv[1]
    return None
    
def output_filename(f):
    r, ext = os.path.splitext(f)
    return r + ".merged" + ext

def supported_image(mime):
    return (mime == 'image/png'
         or mime == 'image/bmp'
         or mime == 'image/gif'
         or mime == 'image/jpeg')

def find_all_elements(node, tag):
    global namespace
    elements = [] 
    full_tag = f"{{{namespace}}}{tag}"
    
#    print(node.tag)

    if node.tag == full_tag:
        elements.append(node)
    for child in node:
        elements.extend(find_all_elements(child, tag))
    return elements

def get_svg_content(filepath):
    with open(filepath, "r") as f:
        content = f.read()

    index = content.find("<svg ")
    if index != -1:
        return content[index:]
    
def mergeHtml(target, output):
    soup = BeautifulSoup(open(target), 'html.parser')
    changed = False
    for elm in soup.find_all('img'):
        image_file = elm['src']
        if not os.path.isfile(image_file):
            continue

        mime_type, _ = mimetypes.guess_type(image_file)
        if mime_type == "image/svg+xml":
            svg = BeautifulSoup(open(image_file), 'html.parser')
            #print(svg)
            elm.replace_with(svg)
            changed = True
            continue

        if not supported_image(mime_type):
            continue

        with open(image_file, mode="rb") as f:
            convert_byte: bytes = base64.b64encode(f.read())
            convert_str: str = convert_byte.decode('ascii')
            prefix = f"data:{mime_type};base64,"
            elm['src'] = prefix + convert_str
            changed = True
        
    if changed:
        with open(output, 'w', encoding='utf-8') as f:
            f.write(str(soup))
            return changed

def mergeSvg(target, output):
    tree = ET.parse(target)
    namespaces = {node[0]: node[1] for _, node in ET.iterparse(target, events=['start-ns'])}
    for key, value in namespaces.items(): 
        ET.register_namespace(key, value)
    root = tree.getroot()
    global namespace

    namespace = root.tag.split('}')[0].strip('{')
    changed = False
    for elm in find_all_elements(root, 'image'):
        for key, attr in elm.attrib.items():
            if re.sub(r'\{[^}]+\}', '', key) != "href":
                continue

            if not os.path.isfile(attr):
                continue

            mime_type, _ = mimetypes.guess_type(attr)
            if not supported_image(mime_type):
                continue

            with open(attr, mode="rb") as f:
                convert_byte: bytes = base64.b64encode(f.read())
                convert_str: str = convert_byte.decode('ascii')

                prefix = f"data:{mime_type};base64,"
                elm.set(key, prefix + convert_str)
                changed = True
    if changed:
        with open(output, mode="w") as f:
            f.write(ET.tostring(root, encoding='utf-8', method='xml').decode('utf-8'))
            return changed

def merge(target):
    output = output_filename(target)
    if os.path.isfile(output):
        print(f"Error: {output} is exists")
        exit()

    res = False
    if isHtml(target):
        res = mergeHtml(target, output)
    elif isSvg(target):
        res = mergeSvg(target, output)
    if res:
        print(f"saved {output}")
        
    

def main():
    target = check_arg()
    if not target:
        usage()
        exit()
    merge(target)
    

if __name__ == "__main__":
    main()
You get articles that match your needs
You can efficiently read back useful information
You can use dark theme
What you can do with signing up