0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

VueでOpenAI Tesseract.js を使用してPDFと画像を文字起こし

Posted at

サンプルコード

App.vue

<template>
  <div id="app" class="container">
    <h1>OCR Web App with Tesseract.js and ChatGPT</h1>
    <input type="file" @change="onFileChange" accept="image/*,.pdf" />
    <input
      v-if="file"
      type="number"
      v-model="pagenum"
      min="1"
      max="100"
      class="input-large border border-blue-400 bg-emerald-200"
      @change="onInputPage"
      :disabled="!isPdf"
    />
    <button 
      class="bg-emerald-200 border border-blue-400 rounded-full"
      @click="processOCR" 
      :disabled="!file"
    >
      Process OCR
    </button>
    <p v-if="isProcessing">Processing...</p>
    <div v-if="ocrText" class="ocr-result">
      <h3>Extracted Text:</h3>
      <pre>{{ ocrText }}</pre>
    </div>
    <div v-if="processedText" class="ocr-result">
      <h3>Processed Text (ChatGPT):</h3>
      <pre>{{ processedText }}</pre>
    </div>
    <img v-if="imageSrc" :src="imageSrc" alt="Uploaded Image" class="image-preview" />
  </div>
</template>

<script>
import Tesseract from "tesseract.js";
import { getDocument, GlobalWorkerOptions } from "pdfjs-dist";
import axios from "axios";

// PDF.jsのワーカーパスを設定
GlobalWorkerOptions.workerSrc = `https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.15.349/pdf.worker.min.js`;

export default {
  data() {
    return {
      file: null,
      imageSrc: null,
      ocrText: "",
      processedText: "",
      isProcessing: false,
      pagenum: 1,
      isPdf: false,
    };
  },
  methods: {
    async onFileChange(event = null) {
      const file = event ? event.target.files[0] : this.file;
      if (!file) {
        this.file = null;
        this.imageSrc = null;
        this.isPdf = false;
        return;
      }

      this.file = file;
      this.isPdf = file.type === "application/pdf";

      if (this.isPdf) {
        const pdf = await getDocument(URL.createObjectURL(file)).promise;
        const page = await pdf.getPage(this.pagenum);
        const viewport = page.getViewport({ scale: 2 });
        const canvas = document.createElement("canvas");
        const context = canvas.getContext("2d");

        canvas.width = viewport.width;
        canvas.height = viewport.height;

        await page.render({ canvasContext: context, viewport }).promise;

        this.imageSrc = canvas.toDataURL("image/png");
      } else {
        this.imageSrc = URL.createObjectURL(file);
      }
    },

    async onInputPage() {
      if (this.file) {
        await this.onFileChange();
      } else {
        console.warn("No file selected.");
      }
    },

    async processOCR() {
      if (!this.file) return;

      this.isProcessing = true;
      this.ocrText = "";
      this.processedText = "";

      let input;
      if (this.isPdf) {
        const response = await fetch(this.imageSrc);
        input = await response.blob();
      } else {
        input = this.file;
      }

      try {
        // Tesseract.jsでOCRを実行
        const { data: { text } } = await Tesseract.recognize(input, "jpn", {
          logger: (info) => console.log(info),
        });
        this.ocrText = text;

        // ChatGPT APIを呼び出して抽出テキストを整形
        const apiKey = import.meta.env.VITE_API_OPEN_AI;
        const chatResponse = await axios.post(
          "https://api.openai.com/v1/chat/completions",
          {
            model: "gpt-4", // または gpt-3.5-turbo
            messages: [
              { role: "system", content: "あなたはプロの日本語校正者です。" },
              { role: "user", content: text },
            ],
          },
          {
            headers: {
              "Content-Type": "application/json",
              Authorization: `Bearer ${apiKey}`,
            },
          }
        );

        this.processedText = chatResponse.data.choices[0].message.content;
      } catch (error) {
        console.error("Error during processing:", error);
        this.ocrText = "An error occurred during OCR processing.";
        this.processedText = "An error occurred during ChatGPT processing.";
      } finally {
        this.isProcessing = false;
      }
    },
  },
};
</script>


<style>
.input-large {
  font-size: 1.5rem; /* フォントサイズを大きくする */
  padding: 10px; /* 内側の余白を広げる */
  width: 150px; /* 幅を調整 */
  height: 50px; /* 高さを調整 */
  border-radius: 10px; /* 角を丸くする */
  text-align: center; /* テキストを中央揃え */
  margin: 10px 0; /* 上下の余白を調整 */
  box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2); /* 影をつけて目立たせる */
  cursor: pointer; /* カーソルをポインタに変更 */
}

.input-large:focus {
  outline: none; /* フォーカス時のデフォルトのアウトラインを削除 */
  border-color: #4caf50; /* フォーカス時の枠の色を変更 */
  box-shadow: 0 0 10px rgba(76, 175, 80, 0.5); /* フォーカス時に目立つ影を追加 */
}
.container {
  text-align: center;
  margin: 20px auto;
  max-width: 600px;
}

button {
  margin-top: 10px;
  padding: 10px 20px;
  font-size: 16px;
  cursor: pointer;
}

pre {
  text-align: left;
  white-space: pre-wrap;
  word-wrap: break-word;
  background: #f4f4f4;
  padding: 10px;
  border-radius: 5px;
  color: #333;
}

.image-preview {
  max-width: 100%;
  margin-top: 20px;
  border: 1px solid #ddd;
  border-radius: 5px;
}

.ocr-result {
  margin-top: 20px;
  text-align: left;
  background: #e9f7ef;
  border: 1px solid #c3e6cb;
  padding: 15px;
  border-radius: 5px;
}
</style>

OpenAIのAPIキーの設定場所

        // ChatGPT APIを呼び出して抽出テキストを整形
        const apiKey = import.meta.env.VITE_API_OPEN_AI;

import.meta.env.VITE_API_OPEN_AI の記述は.envファイルからAPIキーを取り出す処理になります。

.envをVueで使う手順

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?