Cortex Chat + TruLens¶

This quickstart assumes you already have a Cortex Search Service started, JWT token created and Cortex Chat Private Preview enabled for your account. If you need assistance getting started with Cortex Chat, or having Cortex Chat Private Preview enabled please contact your Snowflake account contact.

Install required packages¶

In [ ]:

Copied!

! pip install trulens-core trulens-providers-cortex trulens-connectors-snowflake snowflake-sqlalchemy
! pip install trulens-core trulens-providers-cortex trulens-connectors-snowflake snowflake-sqlalchemy

Set JWT Token, Chat URL, and Search Service¶

In [ ]:

Copied!





import os
os.environ["SNOWFLAKE_JWT"] = "..."
os.environ["SNOWFLAKE_CHAT_URL"] = ".../api/v2/cortex/chat"
os.environ["SNOWFLAKE_CORTEX_SEARCH_SERVICE"] = "<database>.<schema>.<cortex search service name>"
import os
os.environ["SNOWFLAKE_JWT"] = "..."
os.environ["SNOWFLAKE_CHAT_URL"] = ".../api/v2/cortex/chat"
os.environ["SNOWFLAKE_CORTEX_SEARCH_SERVICE"] = ".."

Create a Cortex Chat App¶

The CortexChat class below can be configured with your URL and model selection.

It contains two methods: handle_cortex_chat_response, and chat.

_handle_cortex_chat_response serves to handle the streaming response, and expose the debugging information.
chat is a user-facing method that allows you to input a query and receive a response and citation

In [ ]:

Copied!





import requests
import json
from trulens.apps.custom import instrument

class CortexChat:
    def __init__(self, url: str, cortex_search_service: str, model: str = "mistral-large"):
        """
        Initializes a new instance of the CortexChat class.
        Parameters:
            url (str): The URL of the chat service.
            model (str): The model to be used for chat. Defaults to "mistral-large".
            cortex_search_service (str): The search service to be used for chat.
        """
        self.url = url
        self.model = model
        self.cortex_search_service = cortex_search_service

    @instrument
    def _handle_cortex_chat_response(self, response: requests.Response) -> tuple[str, str, str]:
        """
        Process the response from the Cortex Chat API.
        Args:
            response: The response object from the Cortex Chat API.
        Returns:
            A tuple containing the extracted text, citation, and debug information from the response.
        """

        text = ""
        citation = ""
        debug_info = ""
        previous_line = ""
        
        for line in response.iter_lines():
            if line:
                decoded_line = line.decode('utf-8')
                if decoded_line.startswith("event: done"):
                    if debug_info == "":
                        raise Exception("No debug information, required for TruLens feedback, provided by Cortex Chat API.")
                    return text, citation, debug_info
                if previous_line.startswith("event: error"):
                    error_data = json.loads(decoded_line[5:])
                    error_code = error_data["code"]
                    error_message = error_data["message"]
                    raise Exception(f"Error event received from Cortex Chat API. Error code: {error_code}, Error message: {error_message}")
                else:
                    if decoded_line.startswith('data:'):
                        try:
                            data = json.loads(decoded_line[5:])
                            if data['delta']['content'][0]['type'] == "text":
                                print(data['delta']['content'][0]['text']['value'], end = '')
                                text += data['delta']['content'][0]['text']['value']
                            if data['delta']['content'][0]['type'] == "citation":
                                citation = data['delta']['content'][0]['citation']
                            if data['delta']['content'][0]['type'] == "debug_info":
                                debug_info = data['delta']['content'][0]['debug_info']
                        except json.JSONDecodeError:
                            raise Exception(f"Error decoding JSON: {decoded_line} from {previous_line}")
                    previous_line = decoded_line

    @instrument           
    def chat(self, query: str) -> tuple[str, str]:
        """
        Sends a chat query to the Cortex Chat API and returns the response.
        Args:
            query (str): The chat query to send.
        Returns:
            tuple: A tuple containing the text response and citation.
        Raises:
            None
        Example:
            cortex = CortexChat()
            response = cortex.chat("Hello, how are you?")
            print(response)
            ("I'm good, thank you!", "Cortex Chat API v1.0")
        """

        url = self.url
        headers = {
            'X-Snowflake-Authorization-Token-Type': 'KEYPAIR_JWT',
            'Content-Type': 'application/json',
            'Accept': 'application/json',
            'Authorization': f"Bearer {os.environ.get('SNOWFLAKE_JWT')}"
        }
        data = {
            "query": query,
            "model": self.model,
            "debug": True,
            "search_services": [{
                "name": self.cortex_search_service,
                "max_results": 10,
            }],
            "prompt": "{{.Question}} {{.Context}}",
        }

        response = requests.post(url, headers=headers, json=data, stream=True)
        if response.status_code == 200:
            text, citation, _ = self._handle_cortex_chat_response(response)
            return text, citation
        else:
            print(f"Error: {response.status_code} - {response.text}")

cortex = CortexChat(os.environ["SNOWFLAKE_CHAT_URL"], os.environ["SNOWFLAKE_SEARCH_SERVICE"])
import requests
import json
from trulens.apps.custom import instrument

class CortexChat:
    def __init__(self, url: str, cortex_search_service: str, model: str = "mistral-large"):
        """
        Initializes a new instance of the CortexChat class.
        Parameters:
            url (str): The URL of the chat service.
            model (str): The model to be used for chat. Defaults to "mistral-large".
            cortex_search_service (str): The search service to be used for chat.
        """
        self.url = url
        self.model = model
        self.cortex_search_service = cortex_search_service

    @instrument
    def _handle_cortex_chat_response(self, response: requests.Response) -> tuple[str, str, str]:
        """
        Process the response from the Cortex Chat API.
        Args:
            response: The response object from the Cortex Chat API.
        Returns:
            A tuple containing the extracted text, citation, and debug information from the response.
        """

        text = ""
        citation = ""
        debug_info = ""
        previous_line = ""
        
        for line in response.iter_lines():
            if line:
                decoded_line = line.decode('utf-8')
                if decoded_line.startswith("event: done"):
                    if debug_info == "":
                        raise Exception("No debug information, required for TruLens feedback, provided by Cortex Chat API.")
                    return text, citation, debug_info
                if previous_line.startswith("event: error"):
                    error_data = json.loads(decoded_line[5:])
                    error_code = error_data["code"]
                    error_message = error_data["message"]
                    raise Exception(f"Error event received from Cortex Chat API. Error code: {error_code}, Error message: {error_message}")
                else:
                    if decoded_line.startswith('data:'):
                        try:
                            data = json.loads(decoded_line[5:])
                            if data['delta']['content'][0]['type'] == "text":
                                print(data['delta']['content'][0]['text']['value'], end = '')
                                text += data['delta']['content'][0]['text']['value']
                            if data['delta']['content'][0]['type'] == "citation":
                                citation = data['delta']['content'][0]['citation']
                            if data['delta']['content'][0]['type'] == "debug_info":
                                debug_info = data['delta']['content'][0]['debug_info']
                        except json.JSONDecodeError:
                            raise Exception(f"Error decoding JSON: {decoded_line} from {previous_line}")
                    previous_line = decoded_line

    @instrument           
    def chat(self, query: str) -> tuple[str, str]:
        """
        Sends a chat query to the Cortex Chat API and returns the response.
        Args:
            query (str): The chat query to send.
        Returns:
            tuple: A tuple containing the text response and citation.
        Raises:
            None
        Example:
            cortex = CortexChat()
            response = cortex.chat("Hello, how are you?")
            print(response)
            ("I'm good, thank you!", "Cortex Chat API v1.0")
        """

        url = self.url
        headers = {
            'X-Snowflake-Authorization-Token-Type': 'KEYPAIR_JWT',
            'Content-Type': 'application/json',
            'Accept': 'application/json',
            'Authorization': f"Bearer {os.environ.get('SNOWFLAKE_JWT')}"
        }
        data = {
            "query": query,
            "model": self.model,
            "debug": True,
            "search_services": [{
                "name": self.cortex_search_service,
                "max_results": 10,
            }],
            "prompt": "{{.Question}} {{.Context}}",
        }

        response = requests.post(url, headers=headers, json=data, stream=True)
        if response.status_code == 200:
            text, citation, _ = self._handle_cortex_chat_response(response)
            return text, citation
        else:
            print(f"Error: {response.status_code} - {response.text}")

cortex = CortexChat(os.environ["SNOWFLAKE_CHAT_URL"], os.environ["SNOWFLAKE_SEARCH_SERVICE"])

Start a TruLens session¶

Start a TruLens session connected to Snowflake so we can log traces and evaluations in our Snowflake account.

Learn more about how to log in Snowflake.

In [ ]:

Copied!





from trulens.core import TruSession
from trulens.connectors.snowflake import SnowflakeConnector

connection_params = {
    "account": "...",
    "user": "...",
    "password": "...",
    "database": "...",
    "schema": "...",
    "warehouse": "...",
    "role": "...",
    "init_server_side": False,
}

connector = SnowflakeConnector(**connection_params)
session = TruSession(connector=connector)

session.reset_database()
from trulens.core import TruSession
from trulens.connectors.snowflake import SnowflakeConnector

connection_params = {
    "account": "...",
    "user": "...",
    "password": "...",
    "database": "...",
    "schema": "...",
    "warehouse": "...",
    "role": "...",
    "init_server_side": False,
}

connector = SnowflakeConnector(**connection_params)
session = TruSession(connector=connector)

session.reset_database()

Create Feedback Functions¶

Here we initialize the RAG Triad to provide feedback on the Chat API responses.

If you'd like, you can also choose from a wide variety of stock feedback functions or even create custom feedback functions.

In [ ]:

Copied!





import numpy as np
from trulens.core import Feedback
from trulens.core import Select
from trulens.providers.cortex import Cortex
from snowflake.snowpark.session import Session

snowpark_session = Session.builder.configs(connection_params).create()

provider = Cortex(snowpark_session, "llama3.1-8b")

# Question/answer relevance between overall question and answer.
f_answer_relevance = (
    Feedback(provider.relevance_with_cot_reasons, name="Answer Relevance")
    .on_input()
    .on_output()
)

# Define a groundedness feedback function
f_groundedness = (
    Feedback(
        provider.groundedness_measure_with_cot_reasons, name="Groundedness"
    )
    .on(Select.RecordCalls._handle_cortex_chat_response.rets[2]["retrieved_results"].collect())
    .on_output()
)

# Context relevance between question and each context chunk.
f_context_relevance = (
    Feedback(
        provider.context_relevance_with_cot_reasons, name="Context Relevance"
    )
    .on_input()
    .on(Select.RecordCalls._handle_cortex_chat_response.rets[2]["retrieved_results"][:])
    .aggregate(np.mean)  # choose a different aggregation method if you wish
)
import numpy as np
from trulens.core import Feedback
from trulens.core import Select
from trulens.providers.cortex import Cortex
from snowflake.snowpark.session import Session

snowpark_session = Session.builder.configs(connection_params).create()

provider = Cortex(snowpark_session, "llama3.1-8b")

# Question/answer relevance between overall question and answer.
f_answer_relevance = (
    Feedback(provider.relevance_with_cot_reasons, name="Answer Relevance")
    .on_input()
    .on_output()
)

# Define a groundedness feedback function
f_groundedness = (
    Feedback(
        provider.groundedness_measure_with_cot_reasons, name="Groundedness"
    )
    .on(Select.RecordCalls._handle_cortex_chat_response.rets[2]["retrieved_results"].collect())
    .on_output()
)

# Context relevance between question and each context chunk.
f_context_relevance = (
    Feedback(
        provider.context_relevance_with_cot_reasons, name="Context Relevance"
    )
    .on_input()
    .on(Select.RecordCalls._handle_cortex_chat_response.rets[2]["retrieved_results"][:])
    .aggregate(np.mean)  # choose a different aggregation method if you wish
)

Initialize the TruLens recorder and run the app¶

In [ ]:

Copied!





from trulens.apps.custom import TruCustomApp

tru_recorder = TruCustomApp(
    cortex,
    app_name="Cortex Chat",
    app_version="mistral-large",
    feedbacks=[f_answer_relevance, f_groundedness, f_context_relevance],
)

with tru_recorder as recording:
    # Example usage
    user_query = "Hello! What kind of service does Gregory have?"
    cortex.chat(user_query)
from trulens.apps.custom import TruCustomApp

tru_recorder = TruCustomApp(
    cortex,
    app_name="Cortex Chat",
    app_version="mistral-large",
    feedbacks=[f_answer_relevance, f_groundedness, f_context_relevance],
)

with tru_recorder as recording:
    # Example usage
    user_query = "Hello! What kind of service does Gregory have?"
    cortex.chat(user_query)

Start the dashboard¶

In [ ]:

Copied!

from trulens.dashboard import run_dashboard

run_dashboard(session)
from trulens.dashboard import run_dashboard

run_dashboard(session)