LoginSignup
0
0

More than 1 year has passed since last update.

[Python][Pillow][pdf2image] PDF を画像ファイルに一括変換(コマンドライン)

Last updated at Posted at 2021-12-21

またまた、コマンドラインにて。
pdf ファイルを画像ファイルに変換。
pdf2image がコマンドラインツールの poppler を叩いてるので、別途、poppler のインストールが必須。
poppler は、私の mac 環境において、brew にてインストールし、パスが通っているが、windows ではパスが通ってないかも?
一応、一時的にパスを通すっぽい2行を追加してますが、windows での動作確認はしていない。
敢えて特筆すべきことと言えば、tiff は、可逆圧縮してるくらい。
出力ファイルフォーマットのオプションは必ず、どれかひとつを指定。(デフォルト無し)

pdftoimg.py
import sys
import os
import argparse
import glob
from pathlib import Path
from pdf2image import convert_from_path


def create_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "source",
        type=str,
        help="This is pdf source file or dir. (Specify a file or directory. Wildcards cannot be used.)"
    )
    parser.add_argument(
        "destination",
        type=str,
        nargs="?",
        default=None,
        help="This is destination file or dir."
    )
    parser.add_argument(
        "-d", "--dpi",
        type=str,
        default="200",
        help="Dots per inch, can be seen as the relative resolution of the output PDF, higher is better but anything above 300 is usually not discernable to the naked eye. "
    )
    parser.add_argument(
        "-j", "--jpeg",
        action="store_true",
        help="Output format is jpeg."
    )
    parser.add_argument(
        "-p", "--png",
        action="store_true",
        help="Output format is png."
    )
    parser.add_argument(
        "-t", "--tiff",
        action="store_true",
        help="Output format is tiff."
    )
    parser.add_argument(
        "-m", "--multipage",
        action="store_true",
        help="Output format is multipage tiff."
    )
    parser.add_argument(
        "-g", "--gif",
        action="store_true",
        help="Output format is gif."
    )
    parser.add_argument(
        "-b", "--bmp",
        action="store_true",
        help="Output format is bmp."
    )
    parser.add_argument(
        "-w", "--webp",
        action="store_true",
        help="Output format is webp."
    )
    parser.add_argument(
        "-v", "--verbose",
        action="store_true",
        help="Give more output."
    )
    return parser


def convert_image(f, dest_dir, fmt, suffix, dpi, multi, verbose):
    poppler_dir = Path(__file__).parent.absolute() / "poppler/bin"
    os.environ["PATH"] += os.pathsep + str(poppler_dir)

    root, ext = os.path.splitext(f)
    name = os.path.basename(root)

    try:
        pages = convert_from_path(str(f), dpi)
        if multi:
            file_name = name + suffix
            image_path = dest_dir + "/" + file_name
            if verbose: print(end=">")
            pages[0].save(str(image_path), "TIFF", compression="tiff_deflate", save_all=True, append_images=pages[1:])

        else:
            for i, page in enumerate(pages):
                file_name = name + "_{:03d}".format(i + 1) + suffix
                image_path = dest_dir + "/" + file_name
                if verbose: print(end=">")
                if fmt == "TIFF":
                    page.save(str(image_path), "TIFF", compression="tiff_deflate")

                else:
                    page.save(str(image_path), fmt)

        if verbose: print(" <Success>", dest_dir)

    except OSError as e:
        print("Error: " + f.title)
        pass


def main():
    parser = create_parser()
    args = parser.parse_args()
    verbose = args.verbose
    j = args.jpeg
    p = args.png
    t = args.tiff
    m = args.multipage
    g = args.gif
    b = args.bmp
    w = args.webp
    opt = [j, p, t, m, g, b, w]

    if opt.count(True) == 1:
        fmt , suffix = ("JPEG", ".jpg") if j else ("PNG", ".png") if p else ("TIFF", ".tif") if t or m else ("GIF", ".gif") if g else ("BMP", ".bmp") if b else ("WEBP", ".webp")
        if verbose: print("Image format:", fmt)

    else:
        print("Error: Any one of jpeg, png, tiff, multipage tiff, gif, bmp, webp.")
        sys.exit(1)

    dest_dir = "." if args.destination is None else args.destination
    try:
        os.makedirs(dest_dir, exist_ok=True)
    except FileExistsError as e:
        print("ERROR: Destination is " + e.filename)
        sys.exit(1)

    if os.path.isfile(args.source):
        root, ext = os.path.splitext(args.source)
        if ext != ".pdf":
            print("Error: PDF file does not exist.")
            sys.exit(1)
        if verbose: print(args.source, end=" > ")
        convert_image(args.source, dest_dir, fmt, suffix, args.dpi, m, verbose)

    elif os.path.isdir(args.source):
        files = glob.glob(args.source + "/*.pdf")
        for f in files:
            if verbose: print(f, end=" ")
            convert_image(f, dest_dir, fmt, suffix, args.dpi, m, verbose)

    else:
        print("ERROR: Source file or dir does not exist.")
        sys.exit(1)


if __name__ == '__main__':
    main()

github はこちら

参考

https://www.uosansatox.biz/entry/2017/12/07/124831

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0