0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

lang

Posted at


# =====================================
# 環境変数のロードと変数定義
# =====================================
import os
from dotenv import load_dotenv

# .env ファイルから環境変数をロード
load_dotenv()

# 変数定義
MODEL_NAME: str = 'gemini-2.0-flash-lite'
document_path: str = './document.txt'
google_api_key = os.getenv("GOOGLE_API_KEY") # APIキーを環境変数から取得



# =====================================
# ドキュメントの読み込み
# =====================================
from langchain_community.document_loaders import TextLoader

loader = TextLoader(document_path, encoding="utf-8")
documents = loader.load()


# =====================================
# ドキュメントの読み込み(複数ファイルを読み込む場合)
# =====================================
# 複数のドキュメントパスをリストで定義
document_paths = ['./document1.txt', './document2.txt', './document3.txt']

# 各ドキュメントをロードして結合
documents = []
for path in document_paths:
    loader = TextLoader(path, encoding="utf-8")
    documents.extend(loader.load())  # 複数のDocumentオブジェクトを結合
    

# =====================================
# ドキュメントの分割
# =====================================
from langchain_text_splitters import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)




# =====================================
# 埋め込みの作成(HuggingFace)
# =====================================
from langchain_community.embeddings import HuggingFaceEmbeddings

# Sentence-transformersの日本語に対応したモデル('intfloat/multilingual-e5-large')を指定。MITライセンス
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")



# =====================================
# 埋め込みの作成(Gemini)
# =====================================
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")




# =====================================
# FAISS ベクトルストアの作成と保存
# =====================================
from langchain_community.vectorstores import FAISS

# FAISSベクトルストアの構築と格納
vectorstore = FAISS.from_documents(docs, embeddings)




# =====================================
# LLM の準備
# =====================================
from langchain_google_genai import ChatGoogleGenerativeAI # これも必要であればインポート


# GOOGLE_API_KEY を明示的に渡すか、os.environ に設定されていれば自動で利用されます。
# ここでは明示的に渡す例としています。
llm = ChatGoogleGenerativeAI(model=MODEL_NAME, temperature=0, google_api_key=google_api_key)

# =====================================
# RAG チェーンの構築
# =====================================
from langchain.chains import RetrievalQA # RetrievalQA を使用するために必要


qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)

# ちなみに、ベクトル検索の上位何件を取得するかを制御したい場合は、以下のようにRetrieverの設定で制御できる(大きすぎると無駄な情報が含まれたり、プロンプトのトークン制限に達したりする)
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
    return_source_documents=True
)



# =====================================
# 質問と回答の生成
# =====================================
query = "TMS報告とは何ですか?"
result = qa_chain.invoke({"query": query})


# 質問を表示
print("\n【質問】\n")
print(query)

# 回答を表示
print("\n【回答】\n")
print(result["result"])

# 参照元ドキュメントの表示(.strip(): 先頭と末尾の空白や改行を除去するための関数。)
for doc in result["source_documents"]:
    print('\n【参照ドキュメント】\n', doc.metadata.get('source', '不明'))
    print(doc.page_content.strip())






0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?