LoginSignup
7
17

Llama2とlangchainでpdf読み込んでchatbotの例

Posted at

好きなモデルとPDFを入れてください。質問すればチャットボットが答えます。
私は下記のモデルをダウンロードしました。
https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q8_0.bin

ptyhon.py
import os
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import ConversationalRetrievalChain
import logging
import sys

from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import LlamaCpp
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from pypdf import PdfReader

# PDFファイルの読み込み
reader = PdfReader(r"読み込みたいpdfのパス")

# テキストの抽出
text = page.extract_text()
print(text)

# ログレベルの設定
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, force=True)
# チャンクの分割
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,  # チャンクの最大文字数
    chunk_overlap=20,  # オーバーラップの最大文字数
)
texts = text_splitter.split_text(text)

# チャンクの確認
print(len(texts))
for text in texts:
    print(text[:10].replace("\n", "\\n"), ":", len(text))

# インデックスの作成
index = FAISS.from_texts(
    texts=texts,
    embedding=HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large"),
)
index.save_local("storage")

import os
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.llms import HuggingFacePipeline

from langchain.llms import LlamaCpp
llm = LlamaCpp(
    model_path=r"Llama2のモデルを保存したパス",
    n_ctx=4096,
    temperature=0,
    max_tokens=640,
    verbose=True,
    streaming=True
)

qa = ConversationalRetrievalChain.from_llm(llm, chain_type="stuff",retriever=index.as_retriever(search_kwargs={"k": 4}))

# Chatbot loop
chat_history = []
print("Welcome to the State of the Union chatbot! Type 'exit' to stop.")
while True:
    query = input("Please enter your question: ")
    
    if query.lower() == 'exit':
        break
    result = qa({"question": query, "chat_history": chat_history})

    print("Answer:", result['answer'])
7
17
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
7
17