More than 1 year has passed since last update.

【GAS×ChatGPT】PDFや画像からテキストを自動で抽出し、要約する

Last updated at 2023-06-06Posted at 2023-06-06

はじめに

本記事ではGoogle Apps Script (GAS) を使ってGoogle Cloud VisionとOpenAI GPT-3.5を連携させる方法について解説します。これらのAPIを連携させることで、画像内のテキストを抽出し、そのテキストを自動で要約するというプログラムを作成します。

必要なもの

Google Cloud VisionのAPIキー
OpenAI GPT-3.5のAPIキー

コードの解説

今回のコードは、以下の2つの部分から成り立っています。

Google Apps Script(GAS)
HTMLとJavaScriptのフロントエンドコード

Google Apps Script(GAS)

Google Apps Scriptの部分には、2つの主要な関数があります。

processImage(base64Image, fileType): Google Cloud Vision APIを使って画像内のテキストを抽出します。
callChatGPT(inputText, targetLength): OpenAI GPT-3.5を使用して、抽出したテキストを要約します。

GAS Code

code.js

const API_KEY = 'YOUR_CLOUD_VISION_API_KEY'; // ここにCloud Vision APIキーを設定
const GPT3_API_KEY = 'YOUR_GPT3.5_API_KEY'; // ここにGPT-3.5 APIキーを設定
function doGet() {
  return HtmlService.createHtmlOutputFromFile('index.html').setXFrameOptionsMode(HtmlService.XFrameOptionsMode.ALLOWALL);
}

function processImage(base64Image, fileType) {
  try{
    const url = 'https://vision.googleapis.com/v1/images:annotate?key=' + API_KEY;
    const body = {
      requests: [{
        image: {
          content: base64Image
        },
        features: [{
          type: 'TEXT_DETECTION'
        }]
      }]
    };

    const options = {
      method: 'post',
      contentType: 'application/json',
      payload: JSON.stringify(body)
    };
    
    const response = UrlFetchApp.fetch(url, options);
    const result = JSON.parse(response.getContentText());

    if (result.responses && result.responses[0] && result.responses[0].fullTextAnnotation) {
      return result.responses[0].fullTextAnnotation.text;
    } else {
      return 'No text found';
    }
  }catch (e) {
    console.error(e);
    return "Error occurred: " + e.toString();
  }
}

function callChatGPT(inputText, targetLength) {
  const url = 'https://api.openai.com/v1/chat/completions';
  const messages = [{'role': 'system', 'content': 'あなたはプロの編集者です。'},];
  messages.push({'role': 'user', 'content':
    '`以下の制約条件に従って、入力する文章を要約してください。\n#制約条件\n・重要なキーワードを取りこぼさない。\n・文章の意味を変更しない。\n・架空の表現や言葉を使用しない。\n・入力する文章を句読点を含めて'+targetLength+'文字以内にまとめて出力。\n・文章中の数値には変更を加えない。\n・要約文は日本語で出力する。\n・元の文章の文字数と要約後の文字数は出力しない。\n・要約文のみ出力する。\n#入力する文章\n'+inputText+'\n`'
  });

  const requestBody = {
    'model': 'gpt-3.5-turbo',
    'temperature': 0.7,
    'max_tokens': targetLength + 50,
    'messages': messages,
  }
  const request = {
    method: 'POST',
    muteHttpExceptions: true,
    headers: {
      'Content-Type': 'application/json',
      'Authorization': 'Bearer ' + GPT3_API_KEY,
    },
    payload: JSON.stringify(requestBody),
  };
 
  const response = UrlFetchApp.fetch(url, request);
  const result = JSON.parse(response.getContentText());

  if (result.choices && result.choices[0]) {
    return result.choices[0].message.content;
  } else {
    return 'No summary found';
  }
}

HTMLのフロントエンドコード

HTMLの部分は、次のように動作します。

ユーザーがファイルを選択する機能を提供します。
ファイル(上限は15MB)が選択されると、そのファイル（PDFまたは画像）がGoogle Apps Scriptに送られ、テキスト抽出が行われます。
抽出されたテキストは次に要約され、要約結果がユーザーに表示されます。要約文は1ページ毎に元の文字数のおおよそ3割程度で作成されます。「No summary found」と表示された場合、例えば1ページあたりに含まれる文字数がChatGPT APIのトークン上限を超えていることが考えられます。

HTML Code

index.html

<!DOCTYPE html>
<html>
  <head>
    <script src="https://mozilla.github.io/pdf.js/build/pdf.js"></script>
    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
    <base target="_top">
  </head>
  <body class="container mt-5">
    <div class="row">
    <div class="col-6">
        <div class="custom-file mt-4">
          <input type="file" class="custom-file-input" id="pdfFile" accept="application/pdf, image/png, image/jpeg">
          <label class="custom-file-label" for="pdfFile">ファイルを選択</label>
        </div>
    </div>
    </div>
    <button id="submit" class="btn btn-primary mt-4">送信</button>
    <div id="results" class="mt-5"></div>
    <script>
      document.querySelector('.custom-file-input').addEventListener('change', function(e) {
      const fileName = document.getElementById("pdfFile").files[0].name;
      const nextSibling = e.target.nextElementSibling;
      nextSibling.innerText = fileName;
      });
      
      // PDF.jsの初期設定
      pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://mozilla.github.io/pdf.js/build/pdf.worker.js';

      let file;

      document.getElementById('pdfFile').addEventListener('change', function(e) {
        file = e.target.files[0];
      });

      function checkFile() {
        // Check the file size (maximum 15MB)
        const maxFileSize = 15 * 1024 * 1024; // for 15MB
        if (file.size > maxFileSize) {
          console.error('The file is too large. Please select a file smaller than 10MB.');
          return false;
        }
        return true;
      }


      function processFile(result) {
        if (file.type === 'application/pdf') {
          const typedarray = new Uint8Array(result);
          pdfjsLib.getDocument(typedarray).promise.then(async function(pdf) {
            const processPage = async function(pageNum){
              // ページを取得
              const page = await pdf.getPage(pageNum);

              // 描画領域の設定
              const scale = 1.5;
              const viewport = page.getViewport({scale: scale});
              const canvas = document.createElement('canvas');
              const context = canvas.getContext('2d');
              canvas.height = viewport.height;
              canvas.width = viewport.width;

              // 描画
              const renderContext = {
                canvasContext: context,
                viewport: viewport
              };
              await page.render(renderContext).promise;

              // 画像として取得し、それをGoogle Apps Scriptに送信
              const base64Image = canvas.toDataURL('image/png').split(",")[1];
              await new Promise(resolve => {
                google.script.run.withSuccessHandler((function(pageNum, canvas) {
                  return function(ocrText) {
                    google.script.run.withSuccessHandler(function(summary) {
                      const textDiv = document.createElement('div');
                      textDiv.innerText = 'Page ' + pageNum + ':\n' + summary;
                      document.getElementById('results').appendChild(textDiv);
                      resolve();
                    }).callChatGPT(ocrText, Math.round(ocrText.length * 0.3)); 
                  };
              })(pageNum, canvas)).processImage(base64Image, 'image/png');
              });
            };

            for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) {
              await processPage(pageNum);  // 各ページの処理をシーケンシャルに実行
            }
          });
      } else if (file.type === 'image/png' || file.type === 'image/jpeg') {
          const base64Image = result.split(",")[1];
          google.script.run.withSuccessHandler(function(ocrText) {
            google.script.run.withSuccessHandler(function(summary) {
              const textDiv = document.createElement('div');
              textDiv.innerText = summary;
              document.getElementById('results').appendChild(textDiv);
            }).callChatGPT(ocrText, Math.round(ocrText.length * 0.3)); 
          }).processImage(base64Image, file.type);
        }
      }


      document.getElementById('submit').addEventListener('click', function() {
        if (!file || !checkFile()) return;

        document.getElementById('results').innerHTML = ''; // clear results

        const fileReader = new FileReader();  
        fileReader.onload = function() {
          processFile(this.result);
        };
        if (file.type === 'application/pdf') {
          fileReader.readAsArrayBuffer(file);
        } else if (file.type === 'image/png' || file.type === 'image/jpeg') {
          fileReader.readAsDataURL(file);
        }
      });
    </script>
  </body>
</html>

まとめ

この記事ではGoogle Apps Scriptを使ったGoogle Cloud VisionとOpenAI GPT-3.5の連携について紹介しました。これを基に自分だけのユニークなアプリケーションを作ってみてください。プログラミングは難しく思えるかもしれませんが、一歩一歩進めば必ずできるようになります。今後も挑戦し続けてください！

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up