0
0

ローカルのsvgとかhtmlが参照している画像を埋め込んで1ファイルにする

Posted at
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import sys
import os
import re
import base64
import mimetypes
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup


namespace = ""

def usage():
    f = os.path.basename(__file__)
    a = f"""
usage:
    {f} [filepath]
"""
    print(a)

def check_file_extension(f, check):
    r, ext = os.path.splitext(f)
    return ext.lower() == "." + check.lower()

def isSvg(f):
    return check_file_extension(f, "svg")

def isHtml(f):
    return check_file_extension(f, "html") or check_file_extension(f, "htm") 

def check_arg():
    if len(sys.argv) < 2:
        return None
    if os.path.isfile(sys.argv[1]):
        return sys.argv[1]
    return None
    
def output_filename(f):
    r, ext = os.path.splitext(f)
    return r + ".merged" + ext

def supported_image(mime):
    return (mime == 'image/png'
         or mime == 'image/bmp'
         or mime == 'image/gif'
         or mime == 'image/jpeg')

def find_all_elements(node, tag):
    global namespace
    elements = [] 
    full_tag = f"{{{namespace}}}{tag}"
    
#    print(node.tag)

    if node.tag == full_tag:
        elements.append(node)
    for child in node:
        elements.extend(find_all_elements(child, tag))
    return elements

def get_svg_content(filepath):
    with open(filepath, "r") as f:
        content = f.read()

    index = content.find("<svg ")
    if index != -1:
        return content[index:]
    
def mergeHtml(target, output):
    soup = BeautifulSoup(open(target), 'html.parser')
    changed = False
    for elm in soup.find_all('img'):
        image_file = elm['src']
        if not os.path.isfile(image_file):
            continue

        mime_type, _ = mimetypes.guess_type(image_file)
        if mime_type == "image/svg+xml":
            svg = BeautifulSoup(open(image_file), 'html.parser')
            #print(svg)
            elm.replace_with(svg)
            changed = True
            continue

        if not supported_image(mime_type):
            continue

        with open(image_file, mode="rb") as f:
            convert_byte: bytes = base64.b64encode(f.read())
            convert_str: str = convert_byte.decode('ascii')
            prefix = f"data:{mime_type};base64,"
            elm['src'] = prefix + convert_str
            changed = True
        
    if changed:
        with open(output, 'w', encoding='utf-8') as f:
            f.write(str(soup))
            return changed

def mergeSvg(target, output):
    tree = ET.parse(target)
    namespaces = {node[0]: node[1] for _, node in ET.iterparse(target, events=['start-ns'])}
    for key, value in namespaces.items(): 
        ET.register_namespace(key, value)
    root = tree.getroot()
    global namespace

    namespace = root.tag.split('}')[0].strip('{')
    changed = False
    for elm in find_all_elements(root, 'image'):
        for key, attr in elm.attrib.items():
            if re.sub(r'\{[^}]+\}', '', key) != "href":
                continue

            if not os.path.isfile(attr):
                continue

            mime_type, _ = mimetypes.guess_type(attr)
            if not supported_image(mime_type):
                continue

            with open(attr, mode="rb") as f:
                convert_byte: bytes = base64.b64encode(f.read())
                convert_str: str = convert_byte.decode('ascii')

                prefix = f"data:{mime_type};base64,"
                elm.set(key, prefix + convert_str)
                changed = True
    if changed:
        with open(output, mode="w") as f:
            f.write(ET.tostring(root, encoding='utf-8', method='xml').decode('utf-8'))
            return changed

def merge(target):
    output = output_filename(target)
    if os.path.isfile(output):
        print(f"Error: {output} is exists")
        exit()

    res = False
    if isHtml(target):
        res = mergeHtml(target, output)
    elif isSvg(target):
        res = mergeSvg(target, output)
    if res:
        print(f"saved {output}")
        
    

def main():
    target = check_arg()
    if not target:
        usage()
        exit()
    merge(target)
    

if __name__ == "__main__":
    main()

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0