More than 1 year has passed since last update.

Llama2とlangchainでpdf読み込んでchatbotの例

Posted at 2023-07-22

好きなモデルとPDFを入れてください。質問すればチャットボットが答えます。
私は下記のモデルをダウンロードしました。
https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q8_0.bin

ptyhon.py

import os
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import ConversationalRetrievalChain
import logging
import sys

from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import LlamaCpp
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from pypdf import PdfReader

# PDFファイルの読み込み
reader = PdfReader(r"読み込みたいpdfのパス")

# テキストの抽出
text = page.extract_text()
print(text)

# ログレベルの設定
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, force=True)
# チャンクの分割
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,  # チャンクの最大文字数
    chunk_overlap=20,  # オーバーラップの最大文字数
)
texts = text_splitter.split_text(text)

# チャンクの確認
print(len(texts))
for text in texts:
    print(text[:10].replace("\n", "\\n"), ":", len(text))

# インデックスの作成
index = FAISS.from_texts(
    texts=texts,
    embedding=HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large"),
)
index.save_local("storage")

import os
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.llms import HuggingFacePipeline

from langchain.llms import LlamaCpp
llm = LlamaCpp(
    model_path=r"Llama2のモデルを保存したパス",
    n_ctx=4096,
    temperature=0,
    max_tokens=640,
    verbose=True,
    streaming=True
)

qa = ConversationalRetrievalChain.from_llm(llm, chain_type="stuff",retriever=index.as_retriever(search_kwargs={"k": 4}))

# Chatbot loop
chat_history = []
print("Welcome to the State of the Union chatbot! Type 'exit' to stop.")
while True:
    query = input("Please enter your question: ")
    
    if query.lower() == 'exit':
        break
    result = qa({"question": query, "chat_history": chat_history})

    print("Answer:", result['answer'])

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up