LangChain retrieval agent¶
In this notebook, we are building a LangChain agent to take in user input and figure out the best tool(s) to use via chain of thought (CoT) reasoning.
Given we have more than one distinct tasks defined in the tools for our agent, one being summarization and another one, which generates multiple choice questions and corresponding answers, being more similar to traditional Natural Language Understanding (NLU), we will use to key evaluations for our agent: Tool Input and Tool Selection. Both will be defined with custom functions.
# !pip install trulens trulens-providers-openai trulens-apps-langchain langchain==0.0.335 unstructured==0.10.23 chromadb==0.4.14
import os
from langchain.agents import Tool
from langchain.agents import initialize_agent
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.memory import ConversationSummaryBufferMemory
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
os.environ["OPENAI_API_KEY"] = "sk-..."
Define custom class that loads documents into local vector store.¶
We are using Chroma, one of the open-source embedding database offerings, in the following example
class VectorstoreManager:
def __init__(self):
self.vectorstore = None # Vectorstore for the current conversation
self.all_document_splits = [] # List to hold all document splits added during a conversation
def initialize_vectorstore(self):
"""Initialize an empty vectorstore for the current conversation."""
self.vectorstore = Chroma(
embedding_function=OpenAIEmbeddings(),
)
self.all_document_splits = [] # Reset the documents list for the new conversation
return self.vectorstore
def add_documents_to_vectorstore(self, url_lst: list):
"""Example assumes loading new documents from websites to the vectorstore during a conversation."""
for doc_url in url_lst:
document_splits = self.load_and_split_document(doc_url)
self.all_document_splits.extend(document_splits)
# Create a new Chroma instance with all the documents
self.vectorstore = Chroma.from_documents(
documents=self.all_document_splits,
embedding=OpenAIEmbeddings(),
)
return self.vectorstore
def get_vectorstore(self):
"""Provide the initialized vectorstore for the current conversation. If not initialized, do it first."""
if self.vectorstore is None:
raise ValueError(
"Vectorstore is not initialized. Please initialize it first."
)
return self.vectorstore
@staticmethod
def load_and_split_document(url: str, chunk_size=1000, chunk_overlap=0):
"""Load and split a document into chunks."""
loader = WebBaseLoader(url)
splits = loader.load_and_split(
RecursiveCharacterTextSplitter(
chunk_size=chunk_size, chunk_overlap=chunk_overlap
)
)
return splits
DOC_URL = "http://paulgraham.com/worked.html"
vectorstore_manager = VectorstoreManager()
vec_store = vectorstore_manager.add_documents_to_vectorstore([DOC_URL])
Set up conversational agent with multiple tools.¶
The tools are then selected based on the match between their names/descriptions and the user input, for document retrieval, summarization, and generation of question-answering pairs.
llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.0)
conversational_memory = ConversationSummaryBufferMemory(
k=4,
max_token_limit=64,
llm=llm,
memory_key="chat_history",
return_messages=True,
)
retrieval_summarization_template = """
System: Follow these instructions below in all your responses:
System: always try to retrieve documents as knowledge base or external data source from retriever (vector DB).
System: If performing summarization, you will try to be as accurate and informational as possible.
System: If providing a summary/key takeaways/highlights, make sure the output is numbered as bullet points.
If you don't understand the source document or cannot find sufficient relevant context, be sure to ask me for more context information.
{context}
Question: {question}
Action:
"""
question_generation_template = """
System: Based on the summarized context, you are expected to generate a specified number of multiple choice questions and their answers from the context to ensure understanding. Each question, unless specified otherwise, is expected to have 4 options and only correct answer.
System: Questions should be in the format of numbered list.
{context}
Question: {question}
Action:
"""
summarization_prompt = PromptTemplate(
template=retrieval_summarization_template,
input_variables=["question", "context"],
)
question_generator_prompt = PromptTemplate(
template=question_generation_template,
input_variables=["question", "context"],
)
# retrieval qa chain
summarization_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vec_store.as_retriever(),
chain_type_kwargs={"prompt": summarization_prompt},
)
question_answering_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vec_store.as_retriever(),
chain_type_kwargs={"prompt": question_generator_prompt},
)
tools = [
Tool(
name="Knowledge Base / retrieval from documents",
func=summarization_chain.run,
description="useful for when you need to answer questions about the source document(s).",
),
Tool(
name="Conversational agent to generate multiple choice questions and their answers about the summary of the source document(s)",
func=question_answering_chain.run,
description="useful for when you need to have a conversation with a human and hold the memory of the current / previous conversation.",
),
]
agent = initialize_agent(
agent="chat-conversational-react-description",
tools=tools,
llm=llm,
memory=conversational_memory,
)
Set up Evaluation¶
from trulens.core import TruSession
session = TruSession()
session.reset_database()
from trulens.core import Feedback
from trulens.core import Select
from trulens.providers.openai import OpenAI as fOpenAI
class OpenAI_custom(fOpenAI):
def query_translation(self, question1: str, question2: str) -> float:
return (
float(
self.endpoint.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "Your job is to rate how similar two questions are on a scale of 0 to 10, where 0 is completely distinct and 10 is matching exactly. Respond with the number only.",
},
{
"role": "user",
"content": f"QUESTION 1: {question1}; QUESTION 2: {question2}",
},
],
)
.choices[0]
.message.content
)
/ 10
)
def tool_selection(self, task: str, tool: str) -> float:
return (
float(
self.endpoint.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "Your job is to rate if the TOOL is the right tool for the TASK, where 0 is the wrong tool and 10 is the perfect tool. Respond with the number only.",
},
{
"role": "user",
"content": f"TASK: {task}; TOOL: {tool}",
},
],
)
.choices[0]
.message.content
)
/ 10
)
custom = OpenAI_custom()
# Query translation feedback (custom) to evaluate the similarity between user's original question and the question genenrated by the agent after paraphrasing.
f_query_translation = (
Feedback(custom.query_translation, name="Tool Input")
.on(Select.RecordCalls.agent.plan.args.kwargs.input)
.on(Select.RecordCalls.agent.plan.rets.tool_input)
)
# Tool Selection (custom) to evaluate the tool/task fit
f_tool_selection = (
Feedback(custom.tool_selection, name="Tool Selection")
.on(Select.RecordCalls.agent.plan.args.kwargs.input)
.on(Select.RecordCalls.agent.plan.rets.tool)
)
from trulens.apps.langchain import TruChain
tru_agent = TruChain(
agent,
app_name="Conversational_Agent",
feedbacks=[f_query_translation, f_tool_selection],
)
user_prompts = [
"Please summarize the document to a short summary under 100 words",
"Give me 5 questions in multiple choice format based on the previous summary and give me their answers",
]
with tru_agent as recording:
for prompt in user_prompts:
print(agent(prompt))
Run Trulens dashboard¶
from trulens.core import TruSession
from trulens.dashboard import run_dashboard
session = TruSession()
run_dashboard(session)