LangChain retrieval agent¶

In this notebook, we are building a LangChain agent to take in user input and figure out the best tool(s) to use via chain of thought (CoT) reasoning.

Given we have more than one distinct tasks defined in the tools for our agent, one being summarization and another one, which generates multiple choice questions and corresponding answers, being more similar to traditional Natural Language Understanding (NLU), we will use to key evaluations for our agent: Tool Input and Tool Selection. Both will be defined with custom functions.

In [ ]:

Copied!

# !pip install trulens trulens-providers-openai trulens-apps-langchain langchain==0.0.335 unstructured==0.10.23 chromadb==0.4.14
# !pip install trulens trulens-providers-openai trulens-apps-langchain langchain==0.0.335 unstructured==0.10.23 chromadb==0.4.14

In [ ]:

Copied!





import os

from langchain.agents import Tool
from langchain.agents import initialize_agent
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.memory import ConversationSummaryBufferMemory
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

os.environ["OPENAI_API_KEY"] = "sk-..."
import os

from langchain.agents import Tool
from langchain.agents import initialize_agent
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.memory import ConversationSummaryBufferMemory
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

os.environ["OPENAI_API_KEY"] = "sk-..."

Define custom class that loads documents into local vector store.¶

We are using Chroma, one of the open-source embedding database offerings, in the following example

In [ ]:

Copied!





class VectorstoreManager:
    def __init__(self):
        self.vectorstore = None  # Vectorstore for the current conversation
        self.all_document_splits = []  # List to hold all document splits added during a conversation

    def initialize_vectorstore(self):
        """Initialize an empty vectorstore for the current conversation."""
        self.vectorstore = Chroma(
            embedding_function=OpenAIEmbeddings(),
        )
        self.all_document_splits = []  # Reset the documents list for the new conversation
        return self.vectorstore

    def add_documents_to_vectorstore(self, url_lst: list):
        """Example assumes loading new documents from websites to the vectorstore during a conversation."""
        for doc_url in url_lst:
            document_splits = self.load_and_split_document(doc_url)
            self.all_document_splits.extend(document_splits)

        # Create a new Chroma instance with all the documents
        self.vectorstore = Chroma.from_documents(
            documents=self.all_document_splits,
            embedding=OpenAIEmbeddings(),
        )

        return self.vectorstore

    def get_vectorstore(self):
        """Provide the initialized vectorstore for the current conversation. If not initialized, do it first."""
        if self.vectorstore is None:
            raise ValueError(
                "Vectorstore is not initialized. Please initialize it first."
            )
        return self.vectorstore

    @staticmethod
    def load_and_split_document(url: str, chunk_size=1000, chunk_overlap=0):
        """Load and split a document into chunks."""
        loader = WebBaseLoader(url)
        splits = loader.load_and_split(
            RecursiveCharacterTextSplitter(
                chunk_size=chunk_size, chunk_overlap=chunk_overlap
            )
        )
        return splits
class VectorstoreManager:
    def __init__(self):
        self.vectorstore = None  # Vectorstore for the current conversation
        self.all_document_splits = []  # List to hold all document splits added during a conversation

    def initialize_vectorstore(self):
        """Initialize an empty vectorstore for the current conversation."""
        self.vectorstore = Chroma(
            embedding_function=OpenAIEmbeddings(),
        )
        self.all_document_splits = []  # Reset the documents list for the new conversation
        return self.vectorstore

    def add_documents_to_vectorstore(self, url_lst: list):
        """Example assumes loading new documents from websites to the vectorstore during a conversation."""
        for doc_url in url_lst:
            document_splits = self.load_and_split_document(doc_url)
            self.all_document_splits.extend(document_splits)

        # Create a new Chroma instance with all the documents
        self.vectorstore = Chroma.from_documents(
            documents=self.all_document_splits,
            embedding=OpenAIEmbeddings(),
        )

        return self.vectorstore

    def get_vectorstore(self):
        """Provide the initialized vectorstore for the current conversation. If not initialized, do it first."""
        if self.vectorstore is None:
            raise ValueError(
                "Vectorstore is not initialized. Please initialize it first."
            )
        return self.vectorstore

    @staticmethod
    def load_and_split_document(url: str, chunk_size=1000, chunk_overlap=0):
        """Load and split a document into chunks."""
        loader = WebBaseLoader(url)
        splits = loader.load_and_split(
            RecursiveCharacterTextSplitter(
                chunk_size=chunk_size, chunk_overlap=chunk_overlap
            )
        )
        return splits

In [ ]:

Copied!

DOC_URL = "http://paulgraham.com/worked.html"

vectorstore_manager = VectorstoreManager()
vec_store = vectorstore_manager.add_documents_to_vectorstore([DOC_URL])
DOC_URL = "http://paulgraham.com/worked.html"

vectorstore_manager = VectorstoreManager()
vec_store = vectorstore_manager.add_documents_to_vectorstore([DOC_URL])

Set up conversational agent with multiple tools.¶

The tools are then selected based on the match between their names/descriptions and the user input, for document retrieval, summarization, and generation of question-answering pairs.

In [ ]:

Copied!





llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.0)

conversational_memory = ConversationSummaryBufferMemory(
    k=4,
    max_token_limit=64,
    llm=llm,
    memory_key="chat_history",
    return_messages=True,
)

retrieval_summarization_template = """
System: Follow these instructions below in all your responses:
System: always try to retrieve documents as knowledge base or external data source from retriever (vector DB). 
System: If performing summarization, you will try to be as accurate and informational as possible.
System: If providing a summary/key takeaways/highlights, make sure the output is numbered as bullet points.
If you don't understand the source document or cannot find sufficient relevant context, be sure to ask me for more context information.
{context}
Question: {question}
Action:
"""
question_generation_template = """
System: Based on the summarized context, you are expected to generate a specified number of multiple choice questions and their answers from the context to ensure understanding. Each question, unless specified otherwise, is expected to have 4 options and only correct answer.
System: Questions should be in the format of numbered list.
{context}
Question: {question}
Action:
"""

summarization_prompt = PromptTemplate(
    template=retrieval_summarization_template,
    input_variables=["question", "context"],
)
question_generator_prompt = PromptTemplate(
    template=question_generation_template,
    input_variables=["question", "context"],
)

# retrieval qa chain
summarization_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vec_store.as_retriever(),
    chain_type_kwargs={"prompt": summarization_prompt},
)

question_answering_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vec_store.as_retriever(),
    chain_type_kwargs={"prompt": question_generator_prompt},
)


tools = [
    Tool(
        name="Knowledge Base / retrieval from documents",
        func=summarization_chain.run,
        description="useful for when you need to answer questions about the source document(s).",
    ),
    Tool(
        name="Conversational agent to generate multiple choice questions and their answers about the summary of the source document(s)",
        func=question_answering_chain.run,
        description="useful for when you need to have a conversation with a human and hold the memory of the current / previous conversation.",
    ),
]
agent = initialize_agent(
    agent="chat-conversational-react-description",
    tools=tools,
    llm=llm,
    memory=conversational_memory,
)
llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.0)

conversational_memory = ConversationSummaryBufferMemory(
    k=4,
    max_token_limit=64,
    llm=llm,
    memory_key="chat_history",
    return_messages=True,
)

retrieval_summarization_template = """
System: Follow these instructions below in all your responses:
System: always try to retrieve documents as knowledge base or external data source from retriever (vector DB). 
System: If performing summarization, you will try to be as accurate and informational as possible.
System: If providing a summary/key takeaways/highlights, make sure the output is numbered as bullet points.
If you don't understand the source document or cannot find sufficient relevant context, be sure to ask me for more context information.
{context}
Question: {question}
Action:
"""
question_generation_template = """
System: Based on the summarized context, you are expected to generate a specified number of multiple choice questions and their answers from the context to ensure understanding. Each question, unless specified otherwise, is expected to have 4 options and only correct answer.
System: Questions should be in the format of numbered list.
{context}
Question: {question}
Action:
"""

summarization_prompt = PromptTemplate(
    template=retrieval_summarization_template,
    input_variables=["question", "context"],
)
question_generator_prompt = PromptTemplate(
    template=question_generation_template,
    input_variables=["question", "context"],
)

# retrieval qa chain
summarization_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vec_store.as_retriever(),
    chain_type_kwargs={"prompt": summarization_prompt},
)

question_answering_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vec_store.as_retriever(),
    chain_type_kwargs={"prompt": question_generator_prompt},
)


tools = [
    Tool(
        name="Knowledge Base / retrieval from documents",
        func=summarization_chain.run,
        description="useful for when you need to answer questions about the source document(s).",
    ),
    Tool(
        name="Conversational agent to generate multiple choice questions and their answers about the summary of the source document(s)",
        func=question_answering_chain.run,
        description="useful for when you need to have a conversation with a human and hold the memory of the current / previous conversation.",
    ),
]
agent = initialize_agent(
    agent="chat-conversational-react-description",
    tools=tools,
    llm=llm,
    memory=conversational_memory,
)

Set up Evaluation¶

In [ ]:

Copied!

from trulens.core import TruSession

session = TruSession()

session.reset_database()
from trulens.core import TruSession

session = TruSession()

session.reset_database()

In [ ]:

Copied!

from trulens.core import Feedback
from trulens.core import Select
from trulens.providers.openai import OpenAI as fOpenAI
from trulens.core import Feedback
from trulens.core import Select
from trulens.providers.openai import OpenAI as fOpenAI

In [ ]:

Copied!





class OpenAI_custom(fOpenAI):
    def query_translation(self, question1: str, question2: str) -> float:
        return (
            float(
                self.endpoint.client.chat.completions.create(
                    model="gpt-3.5-turbo",
                    messages=[
                        {
                            "role": "system",
                            "content": "Your job is to rate how similar two questions are on a scale of 0 to 10, where 0 is completely distinct and 10 is matching exactly. Respond with the number only.",
                        },
                        {
                            "role": "user",
                            "content": f"QUESTION 1: {question1}; QUESTION 2: {question2}",
                        },
                    ],
                )
                .choices[0]
                .message.content
            )
            / 10
        )

    def tool_selection(self, task: str, tool: str) -> float:
        return (
            float(
                self.endpoint.client.chat.completions.create(
                    model="gpt-3.5-turbo",
                    messages=[
                        {
                            "role": "system",
                            "content": "Your job is to rate if the TOOL is the right tool for the TASK, where 0 is the wrong tool and 10 is the perfect tool. Respond with the number only.",
                        },
                        {
                            "role": "user",
                            "content": f"TASK: {task}; TOOL: {tool}",
                        },
                    ],
                )
                .choices[0]
                .message.content
            )
            / 10
        )


custom = OpenAI_custom()

# Query translation feedback (custom) to evaluate the similarity between user's original question and the question genenrated by the agent after paraphrasing.
f_query_translation = (
    Feedback(custom.query_translation, name="Tool Input")
    .on(Select.RecordCalls.agent.plan.args.kwargs.input)
    .on(Select.RecordCalls.agent.plan.rets.tool_input)
)

# Tool Selection (custom) to evaluate the tool/task fit
f_tool_selection = (
    Feedback(custom.tool_selection, name="Tool Selection")
    .on(Select.RecordCalls.agent.plan.args.kwargs.input)
    .on(Select.RecordCalls.agent.plan.rets.tool)
)
class OpenAI_custom(fOpenAI):
    def query_translation(self, question1: str, question2: str) -> float:
        return (
            float(
                self.endpoint.client.chat.completions.create(
                    model="gpt-3.5-turbo",
                    messages=[
                        {
                            "role": "system",
                            "content": "Your job is to rate how similar two questions are on a scale of 0 to 10, where 0 is completely distinct and 10 is matching exactly. Respond with the number only.",
                        },
                        {
                            "role": "user",
                            "content": f"QUESTION 1: {question1}; QUESTION 2: {question2}",
                        },
                    ],
                )
                .choices[0]
                .message.content
            )
            / 10
        )

    def tool_selection(self, task: str, tool: str) -> float:
        return (
            float(
                self.endpoint.client.chat.completions.create(
                    model="gpt-3.5-turbo",
                    messages=[
                        {
                            "role": "system",
                            "content": "Your job is to rate if the TOOL is the right tool for the TASK, where 0 is the wrong tool and 10 is the perfect tool. Respond with the number only.",
                        },
                        {
                            "role": "user",
                            "content": f"TASK: {task}; TOOL: {tool}",
                        },
                    ],
                )
                .choices[0]
                .message.content
            )
            / 10
        )


custom = OpenAI_custom()

# Query translation feedback (custom) to evaluate the similarity between user's original question and the question genenrated by the agent after paraphrasing.
f_query_translation = (
    Feedback(custom.query_translation, name="Tool Input")
    .on(Select.RecordCalls.agent.plan.args.kwargs.input)
    .on(Select.RecordCalls.agent.plan.rets.tool_input)
)

# Tool Selection (custom) to evaluate the tool/task fit
f_tool_selection = (
    Feedback(custom.tool_selection, name="Tool Selection")
    .on(Select.RecordCalls.agent.plan.args.kwargs.input)
    .on(Select.RecordCalls.agent.plan.rets.tool)
)

In [ ]:

Copied!





from trulens.apps.langchain import TruChain

tru_agent = TruChain(
    agent,
    app_name="Conversational_Agent",
    feedbacks=[f_query_translation, f_tool_selection],
)
from trulens.apps.langchain import TruChain

tru_agent = TruChain(
    agent,
    app_name="Conversational_Agent",
    feedbacks=[f_query_translation, f_tool_selection],
)

In [ ]:

Copied!





user_prompts = [
    "Please summarize the document to a short summary under 100 words",
    "Give me 5 questions in multiple choice format based on the previous summary and give me their answers",
]

with tru_agent as recording:
    for prompt in user_prompts:
        print(agent(prompt))
user_prompts = [
    "Please summarize the document to a short summary under 100 words",
    "Give me 5 questions in multiple choice format based on the previous summary and give me their answers",
]

with tru_agent as recording:
    for prompt in user_prompts:
        print(agent(prompt))

Run Trulens dashboard¶

In [ ]:

Copied!

from trulens.core import TruSession
from trulens.dashboard import run_dashboard

session = TruSession()
run_dashboard(session)
from trulens.core import TruSession
from trulens.dashboard import run_dashboard

session = TruSession()
run_dashboard(session)