In [5]:
# by nitta
import os
is_colab = 'google.colab' in str(get_ipython())   # for Google Colab

if is_colab:
    from google.colab import drive
    drive.mount('/content/drive')
    SAVE_PATH='/content/drive/MyDrive/DeepLearning/llm_book2'
else:
    SAVE_PATH='.'

# # variable definition in 'account.py' to access HuggingFace Hub
# MyAccount = {
#     'HuggingFace': {
#         'user': 'YourUserName',
#         'token': 'YourTokenWithWritePermission'
#     },
#     'OpenAI': {
#         'api-key': 'YourOpenAI_API_Key'
# }
# }
ACCOUNT_FILE = os.path.join(SAVE_PATH, 'account.py')
%run {ACCOUNT_FILE}             # set 'MyAccount' variable
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

LangChain

LangChain は、さまざまなモジュールを組み合わせることで、複雑なタスクを実行するアプリケーションを作成する。 このモジュールを Chain と呼ぶ。

Agent モジュールを使うことで、ユーザの入力から回答として求められているものを推論し、必要なツールを選択して実行する(= 行動)ことで、最適な回答を導き出す。

Agent は、回答が不十分であると判断した場合は、推論と行動を繰り返して、高い精度で複雑なタスクを実行する。

Quickstart

次の方法を説明する。

  • LangChain, LangSmith, LangServe のセットアップ
  • コンポーネントを chaining するプロトコルをLangChain Expression Language で使用する。(このプロトコルを用いてLangChain 自体が構築されている)
  • LangChain を用いて簡単なアプリケーションを作成する
  • LangSmith を用いてアプリケーションをトレースする
  • LangServe を用いてアプリケーションを提供する

セットアップ

In [6]:
! pip install langchain
Collecting langchain
  Downloading langchain-0.1.11-py3-none-any.whl (807 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 807.5/807.5 kB 3.8 MB/s eta 0:00:00
Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (6.0.1)
Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.0.28)
Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (3.9.3)
Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (4.0.3)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langchain-community<0.1,>=0.0.25 (from langchain)
  Downloading langchain_community-0.0.27-py3-none-any.whl (1.8 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.8/1.8 MB 26.9 MB/s eta 0:00:00
Collecting langchain-core<0.2,>=0.1.29 (from langchain)
  Downloading langchain_core-0.1.30-py3-none-any.whl (256 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 256.9/256.9 kB 34.7 MB/s eta 0:00:00
Collecting langchain-text-splitters<0.1,>=0.0.1 (from langchain)
  Downloading langchain_text_splitters-0.0.1-py3-none-any.whl (21 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.23-py3-none-any.whl (66 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 66.6/66.6 kB 7.2 MB/s eta 0:00:00
Requirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.25.2)
Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.6.3)
Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.31.0)
Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (8.2.3)
Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)
Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.2.0)
Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.1)
Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.5)
Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.4)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain)
  Downloading marshmallow-3.21.1-py3-none-any.whl (49 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 49.4/49.4 kB 6.9 MB/s eta 0:00:00
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain)
  Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)
Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain)
  Downloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)
Requirement already satisfied: anyio<5,>=3 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2,>=0.1.29->langchain) (3.7.1)
Requirement already satisfied: packaging<24.0,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2,>=0.1.29->langchain) (23.2)
Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.17->langchain)
  Downloading orjson-3.9.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (138 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 138.5/138.5 kB 19.2 MB/s eta 0:00:00
Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (0.6.0)
Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (2.16.3)
Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (4.10.0)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.6)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2.0.7)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2024.2.2)
Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.3)
Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3->langchain-core<0.2,>=0.1.29->langchain) (1.3.1)
Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3->langchain-core<0.2,>=0.1.29->langchain) (1.2.0)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)
Installing collected packages: orjson, mypy-extensions, marshmallow, jsonpointer, typing-inspect, jsonpatch, langsmith, dataclasses-json, langchain-core, langchain-text-splitters, langchain-community, langchain
Successfully installed dataclasses-json-0.6.4 jsonpatch-1.33 jsonpointer-2.4 langchain-0.1.11 langchain-community-0.0.27 langchain-core-0.1.30 langchain-text-splitters-0.0.1 langsmith-0.1.23 marshmallow-3.21.1 mypy-extensions-1.0.0 orjson-3.9.15 typing-inspect-0.9.0

LangChain を用いて構築されるアプリケーションでは、LLM呼び出しを複数回呼び出す手順を含んでいる。chain や agent 内で何が起きているかを調べる最良の方法は LangSmith を使うことである。

LangSmith を使う場合は、次の環境変数を設定すること。

export LANGCHAIN_TRACING_V2="true"
export LANGCHAIN_API_KEY="..."

LangChain を用いた構築

LangChain を使うと、LLMを外部のデータソースや計算に接続するアプリケーションを構築できる。 このクィックスタートでは、そのためのいくつかの方法を示す。 返答のためのプロンプト・テンプレートのみに依存する単純な LLM chain から始める。 次に、別のデータベースからデータを取得し、それをプロンプト・テンプレートに渡す chain を構築する。 それから、チャット履歴を追加して、会話検索 chain を構築する。 これにより 前の質問を記憶されているので、LLM とチャット形式で対話できる。 最後に、質問に答えるためにデータを取得する必要があるかどうかを決定するために LLM を使う agent を構築する。 大まかに説明するが、それぞれには詳細がある。 関連するリンクを貼っておくので必要ならば参照してほしい。

LLM Chain: OpenAI

In [7]:
#
! pip install langchain-openai
Collecting langchain-openai
  Downloading langchain_openai-0.0.8-py3-none-any.whl (32 kB)
Requirement already satisfied: langchain-core<0.2.0,>=0.1.27 in /usr/local/lib/python3.10/dist-packages (from langchain-openai) (0.1.30)
Collecting openai<2.0.0,>=1.10.0 (from langchain-openai)
  Downloading openai-1.13.3-py3-none-any.whl (227 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 227.4/227.4 kB 1.5 MB/s eta 0:00:00
Collecting tiktoken<1,>=0.5.2 (from langchain-openai)
  Downloading tiktoken-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.8/1.8 MB 2.4 MB/s eta 0:00:00
Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2.0,>=0.1.27->langchain-openai) (6.0.1)
Requirement already satisfied: anyio<5,>=3 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2.0,>=0.1.27->langchain-openai) (3.7.1)
Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2.0,>=0.1.27->langchain-openai) (1.33)
Requirement already satisfied: langsmith<0.2.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2.0,>=0.1.27->langchain-openai) (0.1.23)
Requirement already satisfied: packaging<24.0,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2.0,>=0.1.27->langchain-openai) (23.2)
Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2.0,>=0.1.27->langchain-openai) (2.6.3)
Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2.0,>=0.1.27->langchain-openai) (2.31.0)
Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2.0,>=0.1.27->langchain-openai) (8.2.3)
Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai<2.0.0,>=1.10.0->langchain-openai) (1.7.0)
Collecting httpx<1,>=0.23.0 (from openai<2.0.0,>=1.10.0->langchain-openai)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 75.6/75.6 kB 2.8 MB/s eta 0:00:00
Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.10.0->langchain-openai) (1.3.1)
Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.10.0->langchain-openai) (4.66.2)
Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.10.0->langchain-openai) (4.10.0)
Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken<1,>=0.5.2->langchain-openai) (2023.12.25)
Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3->langchain-core<0.2.0,>=0.1.27->langchain-openai) (3.6)
Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3->langchain-core<0.2.0,>=0.1.27->langchain-openai) (1.2.0)
Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.10.0->langchain-openai) (2024.2.2)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai<2.0.0,>=1.10.0->langchain-openai)
  Downloading httpcore-1.0.4-py3-none-any.whl (77 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 77.8/77.8 kB 2.7 MB/s eta 0:00:00
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.10.0->langchain-openai)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 58.3/58.3 kB 2.3 MB/s eta 0:00:00
Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.10/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.2.0,>=0.1.27->langchain-openai) (2.4)
Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.0->langchain-core<0.2.0,>=0.1.27->langchain-openai) (3.9.15)
Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.2.0,>=0.1.27->langchain-openai) (0.6.0)
Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.2.0,>=0.1.27->langchain-openai) (2.16.3)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-core<0.2.0,>=0.1.27->langchain-openai) (3.3.2)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-core<0.2.0,>=0.1.27->langchain-openai) (2.0.7)
Installing collected packages: h11, tiktoken, httpcore, httpx, openai, langchain-openai
Successfully installed h11-0.14.0 httpcore-1.0.4 httpx-0.27.0 langchain-openai-0.0.8 openai-1.13.3 tiktoken-0.6.0
In [8]:
import os
os.environ["OPENAI_API_KEY"] = MyAccount["OpenAI"]["api-key"]
In [9]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI()
# llm = ChatOpenAI(openai_api_key=MyAccount["OpenAI"]["api-key"])
In [10]:
llm.invoke("how can langsmith help with testing?")
Out[10]:
AIMessage(content='Langsmith can help with testing by providing automated testing capabilities, such as unit testing and integration testing. It can also help by generating test cases based on grammar and syntax rules, and by providing tools for analyzing test coverage and identifying potential issues in the code. Additionally, Langsmith can be used to simulate various scenarios and test edge cases, helping to ensure the reliability and quality of the software being developed.')

上記回答の機械翻訳

Langsmith は、次のようなさまざまな方法でテストを支援できます。

  1. さまざまなプログラミング言語で記述されたコードのテストを迅速かつ効率的に実行できる自動テスト ツールを提供します。

  2. テストを実行する前にコード内の潜在的なバグやエラーを特定するコード分析機能を提供します。

  3. テスト プロセスを合理化し、テストの作成と実行を容易にするテスト フレームワークとライブラリを提供します。

  4. 単体テスト、統合テスト、エンドツーエンド テストなど、さまざまなテスト手法のサポートを提供します。

  5. 継続的な統合および展開ツールを提供して、テスト プロセスを自動化し、コードの変更が本番環境に展開される前に徹底的にテストされるようにします。

全体として、Langsmith は包括的なテスト機能を通じてソフトウェアの品質と信頼性の向上に役立ちます。

In [11]:
# prompt template を使って回答を誘導することができる
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are world class technical documentation writer."),
    ("user", "{input}")
])
In [12]:
# combine these into a simple LLM chain
# In python, '|' is short hand for calling the object's __or__ method.

chain = prompt | llm
In [13]:
# 上の単純な llm への質問と同じ内容を送ってみる。"techincal writer" として回答してくれるはず。
chain.invoke({"input": "how can langsmith help with testing?"})
Out[13]:
AIMessage(content='Langsmith can help with testing in several ways:\n\n1. Automated Testing: Langsmith can be used to generate test data for automated testing scripts. By creating realistic and diverse test data sets, Langsmith can help improve the coverage and effectiveness of automated tests.\n\n2. Performance Testing: Langsmith can generate large volumes of data to simulate real-world conditions and stress test applications. This can help identify performance bottlenecks and potential issues under heavy loads.\n\n3. Security Testing: Langsmith can generate various types of data to test the security of applications, such as SQL injection attacks, cross-site scripting, and other vulnerabilities. By providing realistic test data, Langsmith can help uncover security weaknesses in applications.\n\n4. User Acceptance Testing: Langsmith can generate test data that closely resembles real user behavior, allowing testers to validate the functionality and usability of applications from an end-user perspective.\n\n5. Data Validation: Langsmith can be used to validate the accuracy and integrity of data in databases or applications by generating test data sets and comparing the results with expected outcomes.\n\nOverall, Langsmith can be a valuable tool in the testing process by providing high-quality test data, enabling comprehensive testing coverage, and improving the overall quality of software applications.')

上記回答の機械翻訳

Langsmith は、次のようなさまざまな方法でテストを支援できます。

  1. テストの自動化: ラングスミスは、言語仕様に基づいてテスト スクリプトまたはテスト ケースを生成することにより、テスト プロセスを自動化できます。 これは、手作業の労力を軽減し、より一貫性のある包括的なテスト範囲を確保するのに役立ちます。

  2. テスト データの生成: Langsmith は、言語のルールと制約に基づいて、さまざまなシナリオのテスト データの生成を支援します。 これは、さまざまなエッジケースをテストし、システムの堅牢性を確保するのに役立ちます。

  3. テスト カバレッジ分析: ラングスミスは、言語仕様に基づいてテスト カバレッジを分析し、さらにテストが必要な領域についての洞察を提供します。 これは、テスト プロセス全体の品質の向上に役立ちます。

  4. 回帰テスト: Langsmith は、コード変更後にテスト ケースを自動的に再実行し、回帰を特定することで、回帰テストの実行を支援します。 これは、新しい変更によって既存の機能が損なわれないようにするのに役立ちます。

  5. パフォーマンス テスト: Langsmith は、言語仕様に基づいて負荷テストを生成することにより、パフォーマンス テストを支援することもできます。 これは、さまざまな負荷条件下でのシステムのパフォーマンスを評価するのに役立ちます。

全体として、Langsmith は言語分析機能を活用することで、テスト プロセスを合理化し、効率を向上させ、テストの品質を向上させることができます。

In [14]:
# ChatModel の出力は message であるが、文字列として扱うと便利な場合が多い。
# 単純な output parser を使って、message を string に変換できる。

from langchain_core.output_parsers import StrOutputParser

chain = prompt | llm | StrOutputParser()
In [15]:
chain.invoke({"input": "how can langsmith help with testing?"})
Out[15]:
'Langsmith is a powerful tool that can greatly assist in testing by automating various aspects of the testing process. Here are some ways Langsmith can help with testing:\n\n1. Automated Test Generation: Langsmith can automatically generate test cases based on the specifications provided, saving time and effort in creating test cases manually.\n\n2. Code Analysis: Langsmith can analyze the code under test to identify potential issues such as code smells, bugs, or vulnerabilities, helping testers focus on areas that require attention.\n\n3. Test Execution: Langsmith can execute test cases automatically, providing quick feedback on the quality of the code and identifying any failures or regressions.\n\n4. Integration with Testing Frameworks: Langsmith can integrate with popular testing frameworks like JUnit, TestNG, or Selenium, making it easier to incorporate automated testing into the development process.\n\n5. Continuous Integration: Langsmith can be integrated into continuous integration pipelines to automate testing as part of the build process, ensuring that new code changes are thoroughly tested before deployment.\n\nOverall, Langsmith can streamline the testing process, improve test coverage, and help identify issues early in the development cycle, ultimately leading to a more robust and reliable software product.'

基本的な LLM chain のせってアップ方法を解説してきた。 プロンプトやモデル、出力パーサの基礎に触れたに過ぎない。 詳しく知りたい場合は Model I/O を参照すること。

Retrieval Chain (検索チェイン)

元の質問 "how can langsmith help with testing?" に適切に回答するために、LLMに追加の文脈を提供する必要がある。 retrieval を用いてこれを行うことができる。 retreaval は、LLMに直接渡すには多すぎるデータがあるときに、役に立つ。 最も関連した部分だけを取得して渡すには retreaver を用いる。

この手順では、関連文書を Retriever から検索して、プロンプトに渡す。 retriever は SQL テーブル、インターネットなど、なんでも裏で使えるが、 今回はvector storeを設定してretriever として使ってみる。 詳しい説明は Vector stores を参照のこと。

最初に、インデックスしたいデータをロードする必要がある。 そのために WebBaseLoader を使う。 これには BeautifulSoup をインストールする必要がある。

In [16]:
! pip install beautifulsoup4
Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (4.12.3)
Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4) (2.5)
In [17]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://docs.smith.langchain.com/user_guide")

docs = loader.load()

次に、これを vector store へインデックスする必要がある。 そのためには、いくつかのコンポーネント、すなわち embedding model と vectorstore が必要となる。

OpenAI の embedding model の場合

In [18]:
# lancchain_openai パッケージをインストールしたうえで、環境変数が設定されている必要がある。
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
In [19]:
# 簡単にするためローカルな vector store である FAISS を使う。
# まず、パッケージをインストールする
! pip install faiss-gpu   # faiss-cpu
Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 85.5/85.5 MB 3.4 MB/s eta 0:00:00
Installing collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2
In [20]:
# 次に、自分の index を構築する
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)
vector = FAISS.from_documents(documents, embeddings)

このデータは vectorstore 内で index 付けされたので、retrieval chain を作成できる。 この chain は入力される質問を受け取り、関連文書を検索し、元の質問と共にこれらの文書をLLM に渡し、元の質問に回答するように依頼する。

In [21]:
# 最初に、質問を受け取り、文書を検索し、回答を生成する chain を設定する。
from langchain.chains.combine_documents import create_stuff_documents_chain

prompt = ChatPromptTemplate.from_template(
    """
    Answer the following question based only on the provided context:

    <context>
    {context}
    </context>

    Question: {input}
    """ )

document_chain = create_stuff_documents_chain(llm, prompt)
In [22]:
# 必要ならば、文書に直接渡すことでこれを自分で実行することもできる。
from langchain_core.documents import Document

document_chain.invoke({
    "input": "how can langsmith help with testing?",
    "context": [Document(page_content="langsmith can let you visualize test results")]
})
Out[22]:
'Langsmith can help with testing by allowing you to visualize test results.'

#しかし、最初に設定した retriever から文書を得ようと思う。 このやり方では、与えられた質問に対して、retriever を用いて最も関連が深い文書を動的に選択して、それを渡すことができる。

In [23]:
from langchain.chains import create_retrieval_chain

retriever = vector.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)
In [24]:
# これで、chain を呼び出すことができるようになった。辞書を返すが、LLMからの返答では "answer" key が回答である。
response = retrieval_chain.invoke({
    "input": "how can langsmith :help with testing?"
    })

print(response["answer"])
LangSmith can help with testing by allowing developers to create datasets, run tests on LLM applications, compare different versions of applications, use a playground environment for rapid iteration and experimentation, collect feedback from users, annotate traces for evaluation, add runs to datasets for refinement, and monitor key metrics for performance analysis.

これで 基本的な retrieval chain を設定できたことになる。 より詳しい情報は Retrieval を参照すること。

Conversation Retrieval Chain (会話検索チェイン)

これまで作成してきた chain は、単一の質問に答えるだけであった。 LLM アプリケーションの主要なタイプの一つは、chat bot である。 chain を続く質問に答えられるように調整する方法について述べる。

やはり create_retrieval_chain 関数を使うことができるが、 2種類の変更点がある。

  • 検索メソッドは、直近の質問に対してのみ動作するのではなく、履歴をすべて考慮する必要がある。
  • LLM chain の最後でも、すべての履歴を考慮する必要がある。

Updating Retrieval (検索を更新する)

検索を更新するために、新しい chain を作成する。 この chain は、最新の質問を input に、 会話履歴を chat_history に受け取り、LLM を使用して 質問クエリを生成する。

In [25]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

# 最初に、LLM に渡して検索クエリを生成させるための、プロンプトが必要となる。

prompt = ChatPromptTemplate.from_messages([
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
    ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
])

retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
In [26]:
# ユーザーが引き続く質問をしたインスタンスを渡して、テストする。
from langchain_core.messages import HumanMessage, AIMessage

chat_history = [
    HumanMessage(content="Can LangSmith help test my LLM applications?"),
    AIMessage(content="Yes!")
]
retriever_chain.invoke({
    "chat_history": chat_history,
    "input": "Tell me how"
})
Out[26]:
[Document(page_content='Skip to main content\uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith DocsLangChain Python DocsLangChain JS/TS DocsLangSmith API DocsSearchGo to AppLangSmithUser GuideSetupPricing (Coming Soon)Self-HostingTracingEvaluationMonitoringPrompt HubProxyUser GuideOn this pageLangSmith User GuideLangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we‚Äôll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they‚Äôre just starting their journey.Prototyping‚ÄãPrototyping LLM applications often involves quick experimentation between prompts, model types, retrieval strategy and other parameters.\nThe ability to rapidly understand how the model is performing ‚Äî and debug where it is failing ‚Äî is incredibly important for this phase.Debugging‚ÄãWhen developing new LLM applications, we suggest having LangSmith tracing enabled by default.\nOftentimes, it isn‚Äôt necessary to look at every single trace. However, when things go wrong (an unexpected end result, infinite agent loop, slower than expected execution, higher than expected token usage), it‚Äôs extremely helpful to debug by looking through the application traces. LangSmith gives clear visibility and debugging information at each step of an LLM sequence, making it much easier to identify and root-cause issues.\nWe provide native rendering of chat messages, functions, and retrieve documents.Initial Test Set‚ÄãWhile many developers still ship an initial version of their application based on ‚Äúvibe checks‚Äù, we‚Äôve seen an increasing number of engineering teams start to adopt a more test driven approach. LangSmith allows developers to create datasets, which are collections of inputs and reference outputs, and use these to run tests on their LLM applications.\nThese test cases can be uploaded in bulk, created on the fly, or exported from application traces. LangSmith also makes it easy to run custom evaluations (both LLM and heuristic based) to score test results.Comparison View‚ÄãWhen prototyping different versions of your applications and making changes, it‚Äôs important to see whether or not you‚Äôve regressed with respect to your initial test cases.\nOftentimes, changes in the prompt, retrieval strategy, or model choice can have huge implications in responses produced by your application.\nIn order to get a sense for which variant is performing better, it‚Äôs useful to be able to view results for different configurations on the same datapoints side-by-side. We‚Äôve invested heavily in a user-friendly comparison view for test runs to track and diagnose regressions in test scores across multiple revisions of your application.Playground‚ÄãLangSmith provides a playground environment for rapid iteration and experimentation.\nThis allows you to quickly test out different prompts and models. You can open the playground from any prompt or model run in your trace.', metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we‚Äôll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they‚Äôre just starting their journey.', 'language': 'en'}),
 Document(page_content='LangSmith User Guide | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we‚Äôll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they‚Äôre just starting their journey.', 'language': 'en'}),
 Document(page_content="This allows you to quickly test out different prompts and models. You can open the playground from any prompt or model run in your trace.\nEvery playground run is logged in the system and can be used to create test cases or compare with other runs.Beta Testing‚ÄãBeta testing allows developers to collect more data on how their LLM applications are performing in real-world scenarios. In this phase, it‚Äôs important to develop an understanding for the types of inputs the app is performing well or poorly on and how exactly it‚Äôs breaking down in those cases. Both feedback collection and run annotation are critical for this workflow. This will help in curation of test cases that can help track regressions/improvements and development of automatic evaluations.Collecting Feedback‚ÄãWhen launching your application to an initial set of users, it‚Äôs important to gather human feedback on the responses it‚Äôs producing. This helps draw attention to the most interesting runs and highlight edge cases that are causing problematic responses. LangSmith allows you to attach feedback scores to logged traces (oftentimes, this is hooked up to a feedback button in your app), then filter on traces that have a specific feedback tag and score. A common workflow is to filter on traces that receive a poor user feedback score, then drill down into problematic points using the detailed trace view.Annotating Traces‚ÄãLangSmith also supports sending runs to annotation queues, which allow annotators to closely inspect interesting traces and annotate them with respect to different criteria. Annotators can be PMs, engineers, or even subject matter experts. This allows users to catch regressions across important evaluation criteria.Adding Runs to a Dataset‚ÄãAs your application progresses through the beta testing phase, it's essential to continue collecting data to refine and improve its performance. LangSmith enables you to add runs as examples to datasets (from both the project page and within an annotation queue), expanding your test coverage on real-world scenarios. This is a key benefit in having your logging system and your evaluation/testing system in the same platform.Production‚ÄãClosely inspecting key data points, growing benchmarking datasets, annotating traces, and drilling down into important data in trace view are workflows you‚Äôll also want to do once your app hits production. However, especially at the production stage, it‚Äôs crucial to get a high-level overview of application performance with respect to latency, cost, and feedback scores. This ensures that it's delivering desirable results at scale.Monitoring and A/B Testing‚ÄãLangSmith provides monitoring charts that allow you to track key metrics over time. You can expand to view metrics for a given period and drill down into a specific data point to get a trace table for that time period ‚Äî this is especially handy for debugging production issues.LangSmith also allows for tag and metadata grouping, which allows users to mark different versions of their applications with different identifiers and view how they are performing side-by-side within each chart. This is helpful for A/B testing changes in prompt, model, or retrieval strategy.Was this page helpful?PreviousLangSmithNextSetupPrototypingBeta TestingProductionCommunityDiscordTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogCopyright ¬© 2024 LangChain, Inc.", metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we‚Äôll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they‚Äôre just starting their journey.', 'language': 'en'})]
In [27]:
# 新しい retriever を得て、検索済みの文書を記憶しながら、会話を続けるための新しい chain を生成することができる。
prompt = ChatPromptTemplate.from_messages([
    ("system", """
    Answer the user's questions based on the below context:

    {context}
    """),
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
])
document_chain = create_stuff_documents_chain(llm, prompt)

retrieval_chain = create_retrieval_chain(retriever_chain, document_chain)
In [28]:
# end-to-end でテストできるようになった。

chat_history = [
    HumanMessage(content="Can LangSmith help test my LLM applications?"),
    AIMessage(content="Yes!")
]
retrieval_chain.invoke({
    "chat_history": chat_history,
    "input": "Tell me how"
})
Out[28]:
{'chat_history': [HumanMessage(content='Can LangSmith help test my LLM applications?'),
  AIMessage(content='Yes!')],
 'input': 'Tell me how',
 'context': [Document(page_content='Skip to main content\uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith DocsLangChain Python DocsLangChain JS/TS DocsLangSmith API DocsSearchGo to AppLangSmithUser GuideSetupPricing (Coming Soon)Self-HostingTracingEvaluationMonitoringPrompt HubProxyUser GuideOn this pageLangSmith User GuideLangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we‚Äôll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they‚Äôre just starting their journey.Prototyping‚ÄãPrototyping LLM applications often involves quick experimentation between prompts, model types, retrieval strategy and other parameters.\nThe ability to rapidly understand how the model is performing ‚Äî and debug where it is failing ‚Äî is incredibly important for this phase.Debugging‚ÄãWhen developing new LLM applications, we suggest having LangSmith tracing enabled by default.\nOftentimes, it isn‚Äôt necessary to look at every single trace. However, when things go wrong (an unexpected end result, infinite agent loop, slower than expected execution, higher than expected token usage), it‚Äôs extremely helpful to debug by looking through the application traces. LangSmith gives clear visibility and debugging information at each step of an LLM sequence, making it much easier to identify and root-cause issues.\nWe provide native rendering of chat messages, functions, and retrieve documents.Initial Test Set‚ÄãWhile many developers still ship an initial version of their application based on ‚Äúvibe checks‚Äù, we‚Äôve seen an increasing number of engineering teams start to adopt a more test driven approach. LangSmith allows developers to create datasets, which are collections of inputs and reference outputs, and use these to run tests on their LLM applications.\nThese test cases can be uploaded in bulk, created on the fly, or exported from application traces. LangSmith also makes it easy to run custom evaluations (both LLM and heuristic based) to score test results.Comparison View‚ÄãWhen prototyping different versions of your applications and making changes, it‚Äôs important to see whether or not you‚Äôve regressed with respect to your initial test cases.\nOftentimes, changes in the prompt, retrieval strategy, or model choice can have huge implications in responses produced by your application.\nIn order to get a sense for which variant is performing better, it‚Äôs useful to be able to view results for different configurations on the same datapoints side-by-side. We‚Äôve invested heavily in a user-friendly comparison view for test runs to track and diagnose regressions in test scores across multiple revisions of your application.Playground‚ÄãLangSmith provides a playground environment for rapid iteration and experimentation.\nThis allows you to quickly test out different prompts and models. You can open the playground from any prompt or model run in your trace.', metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we‚Äôll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they‚Äôre just starting their journey.', 'language': 'en'}),
  Document(page_content='LangSmith User Guide | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we‚Äôll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they‚Äôre just starting their journey.', 'language': 'en'}),
  Document(page_content="This allows you to quickly test out different prompts and models. You can open the playground from any prompt or model run in your trace.\nEvery playground run is logged in the system and can be used to create test cases or compare with other runs.Beta Testing‚ÄãBeta testing allows developers to collect more data on how their LLM applications are performing in real-world scenarios. In this phase, it‚Äôs important to develop an understanding for the types of inputs the app is performing well or poorly on and how exactly it‚Äôs breaking down in those cases. Both feedback collection and run annotation are critical for this workflow. This will help in curation of test cases that can help track regressions/improvements and development of automatic evaluations.Collecting Feedback‚ÄãWhen launching your application to an initial set of users, it‚Äôs important to gather human feedback on the responses it‚Äôs producing. This helps draw attention to the most interesting runs and highlight edge cases that are causing problematic responses. LangSmith allows you to attach feedback scores to logged traces (oftentimes, this is hooked up to a feedback button in your app), then filter on traces that have a specific feedback tag and score. A common workflow is to filter on traces that receive a poor user feedback score, then drill down into problematic points using the detailed trace view.Annotating Traces‚ÄãLangSmith also supports sending runs to annotation queues, which allow annotators to closely inspect interesting traces and annotate them with respect to different criteria. Annotators can be PMs, engineers, or even subject matter experts. This allows users to catch regressions across important evaluation criteria.Adding Runs to a Dataset‚ÄãAs your application progresses through the beta testing phase, it's essential to continue collecting data to refine and improve its performance. LangSmith enables you to add runs as examples to datasets (from both the project page and within an annotation queue), expanding your test coverage on real-world scenarios. This is a key benefit in having your logging system and your evaluation/testing system in the same platform.Production‚ÄãClosely inspecting key data points, growing benchmarking datasets, annotating traces, and drilling down into important data in trace view are workflows you‚Äôll also want to do once your app hits production. However, especially at the production stage, it‚Äôs crucial to get a high-level overview of application performance with respect to latency, cost, and feedback scores. This ensures that it's delivering desirable results at scale.Monitoring and A/B Testing‚ÄãLangSmith provides monitoring charts that allow you to track key metrics over time. You can expand to view metrics for a given period and drill down into a specific data point to get a trace table for that time period ‚Äî this is especially handy for debugging production issues.LangSmith also allows for tag and metadata grouping, which allows users to mark different versions of their applications with different identifiers and view how they are performing side-by-side within each chart. This is helpful for A/B testing changes in prompt, model, or retrieval strategy.Was this page helpful?PreviousLangSmithNextSetupPrototypingBeta TestingProductionCommunityDiscordTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogCopyright ¬© 2024 LangChain, Inc.", metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we‚Äôll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they‚Äôre just starting their journey.', 'language': 'en'})],
 'answer': 'LangSmith can help test your LLM applications by providing features such as:\n\n1. Prototyping: Allows quick experimentation between prompts, model types, and retrieval strategies.\n2. Debugging: Offers clear visibility and debugging information at each step of an LLM sequence.\n3. Initial Test Set: Enables developers to create datasets for running tests on their applications.\n4. Comparison View: Helps track and diagnose regressions in test scores across multiple revisions of your application.\n5. Playground: Provides a playground environment for rapid iteration and experimentation with different prompts and models.\n6. Beta Testing: Allows developers to collect more data on how their applications are performing in real-world scenarios.\n7. Monitoring and A/B Testing: Provides monitoring charts to track key metrics over time and compare different versions of applications.\n\nThese features in LangSmith facilitate testing and evaluation of LLM applications at various stages of development and deployment.'}

LangSimth でのテストに関する文書を、これが返すことがわかる。 これは、続く質問を会話履歴に結合して、LLM が新しい質問を生成したおかげである。

この新しい retriver を得たので、検索した文書を心にとめつつ、会話を続ける新しい chain を生成できる。

In [29]:
prompt = ChatPromptTemplate.from_messages([
    ("system", """
    Answer the user's questions based on the below context:

    {context}
    """),
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
])

document_chain = create_stuff_documents_chain(llm, prompt)

retrieval_chain = create_retrieval_chain(retriever_chain, document_chain)
In [30]:
# end-to-end でテストする
chat_history = [
    HumanMessage(content="Can LangSmith help test my LLM applications?"),
    AIMessage(content="Yes!")
]

retrieval_chain.invoke({
    "chat_history": chat_history,
    "input": "Tell me how"
})
Out[30]:
{'chat_history': [HumanMessage(content='Can LangSmith help test my LLM applications?'),
  AIMessage(content='Yes!')],
 'input': 'Tell me how',
 'context': [Document(page_content='Skip to main content\uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith DocsLangChain Python DocsLangChain JS/TS DocsLangSmith API DocsSearchGo to AppLangSmithUser GuideSetupPricing (Coming Soon)Self-HostingTracingEvaluationMonitoringPrompt HubProxyUser GuideOn this pageLangSmith User GuideLangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we‚Äôll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they‚Äôre just starting their journey.Prototyping‚ÄãPrototyping LLM applications often involves quick experimentation between prompts, model types, retrieval strategy and other parameters.\nThe ability to rapidly understand how the model is performing ‚Äî and debug where it is failing ‚Äî is incredibly important for this phase.Debugging‚ÄãWhen developing new LLM applications, we suggest having LangSmith tracing enabled by default.\nOftentimes, it isn‚Äôt necessary to look at every single trace. However, when things go wrong (an unexpected end result, infinite agent loop, slower than expected execution, higher than expected token usage), it‚Äôs extremely helpful to debug by looking through the application traces. LangSmith gives clear visibility and debugging information at each step of an LLM sequence, making it much easier to identify and root-cause issues.\nWe provide native rendering of chat messages, functions, and retrieve documents.Initial Test Set‚ÄãWhile many developers still ship an initial version of their application based on ‚Äúvibe checks‚Äù, we‚Äôve seen an increasing number of engineering teams start to adopt a more test driven approach. LangSmith allows developers to create datasets, which are collections of inputs and reference outputs, and use these to run tests on their LLM applications.\nThese test cases can be uploaded in bulk, created on the fly, or exported from application traces. LangSmith also makes it easy to run custom evaluations (both LLM and heuristic based) to score test results.Comparison View‚ÄãWhen prototyping different versions of your applications and making changes, it‚Äôs important to see whether or not you‚Äôve regressed with respect to your initial test cases.\nOftentimes, changes in the prompt, retrieval strategy, or model choice can have huge implications in responses produced by your application.\nIn order to get a sense for which variant is performing better, it‚Äôs useful to be able to view results for different configurations on the same datapoints side-by-side. We‚Äôve invested heavily in a user-friendly comparison view for test runs to track and diagnose regressions in test scores across multiple revisions of your application.Playground‚ÄãLangSmith provides a playground environment for rapid iteration and experimentation.\nThis allows you to quickly test out different prompts and models. You can open the playground from any prompt or model run in your trace.', metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we‚Äôll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they‚Äôre just starting their journey.', 'language': 'en'}),
  Document(page_content='LangSmith User Guide | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we‚Äôll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they‚Äôre just starting their journey.', 'language': 'en'}),
  Document(page_content="This allows you to quickly test out different prompts and models. You can open the playground from any prompt or model run in your trace.\nEvery playground run is logged in the system and can be used to create test cases or compare with other runs.Beta Testing‚ÄãBeta testing allows developers to collect more data on how their LLM applications are performing in real-world scenarios. In this phase, it‚Äôs important to develop an understanding for the types of inputs the app is performing well or poorly on and how exactly it‚Äôs breaking down in those cases. Both feedback collection and run annotation are critical for this workflow. This will help in curation of test cases that can help track regressions/improvements and development of automatic evaluations.Collecting Feedback‚ÄãWhen launching your application to an initial set of users, it‚Äôs important to gather human feedback on the responses it‚Äôs producing. This helps draw attention to the most interesting runs and highlight edge cases that are causing problematic responses. LangSmith allows you to attach feedback scores to logged traces (oftentimes, this is hooked up to a feedback button in your app), then filter on traces that have a specific feedback tag and score. A common workflow is to filter on traces that receive a poor user feedback score, then drill down into problematic points using the detailed trace view.Annotating Traces‚ÄãLangSmith also supports sending runs to annotation queues, which allow annotators to closely inspect interesting traces and annotate them with respect to different criteria. Annotators can be PMs, engineers, or even subject matter experts. This allows users to catch regressions across important evaluation criteria.Adding Runs to a Dataset‚ÄãAs your application progresses through the beta testing phase, it's essential to continue collecting data to refine and improve its performance. LangSmith enables you to add runs as examples to datasets (from both the project page and within an annotation queue), expanding your test coverage on real-world scenarios. This is a key benefit in having your logging system and your evaluation/testing system in the same platform.Production‚ÄãClosely inspecting key data points, growing benchmarking datasets, annotating traces, and drilling down into important data in trace view are workflows you‚Äôll also want to do once your app hits production. However, especially at the production stage, it‚Äôs crucial to get a high-level overview of application performance with respect to latency, cost, and feedback scores. This ensures that it's delivering desirable results at scale.Monitoring and A/B Testing‚ÄãLangSmith provides monitoring charts that allow you to track key metrics over time. You can expand to view metrics for a given period and drill down into a specific data point to get a trace table for that time period ‚Äî this is especially handy for debugging production issues.LangSmith also allows for tag and metadata grouping, which allows users to mark different versions of their applications with different identifiers and view how they are performing side-by-side within each chart. This is helpful for A/B testing changes in prompt, model, or retrieval strategy.Was this page helpful?PreviousLangSmithNextSetupPrototypingBeta TestingProductionCommunityDiscordTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogCopyright ¬© 2024 LangChain, Inc.", metadata={'source': 'https://docs.smith.langchain.com/user_guide', 'title': 'LangSmith User Guide | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith', 'description': 'LangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we‚Äôll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they‚Äôre just starting their journey.', 'language': 'en'})],
 'answer': 'LangSmith can help test your LLM applications by providing features such as tracing, test case creation, custom evaluations, comparison view, playground environment for rapid iteration and experimentation, beta testing support, feedback collection, annotation queues, adding runs to datasets, monitoring charts for tracking key metrics, and A/B testing capabilities. These features allow you to assess and improve the performance of your LLM applications at various stages of development and production.'}

一貫した回答が得られたことがわかる。retrieval chain (質問チェイン)を chat bot へと調整することができた。

Agent

各ステップが事前にわかっている chain 例を作成してきた。 最後に作成するのは agent で、agent では LLM がどのステップを取るかを決定する。

[注意] ローカルなモデルはまだ信頼性がないので、OpenAI モデルを使った agent の作り方のみを説明する。

agent を構築するときにさいしょにすべきなのは、どのツールにアクセスできるかを決定することである。 この例では、agent が2つのツールにアクセスできるものとする。

  1. たった今作成した retriever。これにより、LangSmith に関する質問に簡単に答えることができる。
  2. 検索ツール。これにより、最新のデータ情報が必要な質問に簡単に答えることができる。

最初に、たった今作成した retriever 用のツールをセットアップする。

In [31]:
from langchain.tools.retriever import create_retriever_tool

retriever_tool = create_retriever_tool(
    retriever,
    "langsmith_search",
    """\
    Search for information about LangSmith. \
    For any questions about LangSmith, \
    you must use this tool!\
    """,
)

使用する検索ツールは Tavily である。 https://tavily.com/

Tavily API の使用は、限定的ならば無料である。 クレジットカードの入力無しでサインアップして API キーを取得できる。

In [32]:
# 検察ツールは Tavily である。
import os
os.environ["TAVILY_API_KEY"] = MyAccount["Tavily"]["api-key"]
In [33]:
from langchain_community.tools.tavily_search import TavilySearchResults

search = TavilySearchResults()
In [34]:
tools = [retriever_tool, search]

ツールを準備したので、これらを使う agent を生成する。

In [35]:
! pip install langchainhub
Collecting langchainhub
  Downloading langchainhub-0.1.15-py3-none-any.whl (4.6 kB)
Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchainhub) (2.31.0)
Collecting types-requests<3.0.0.0,>=2.31.0.2 (from langchainhub)
  Downloading types_requests-2.31.0.20240311-py3-none-any.whl (14 kB)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub) (3.6)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub) (2.0.7)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub) (2024.2.2)
Installing collected packages: types-requests, langchainhub
Successfully installed langchainhub-0.1.15 types-requests-2.31.0.20240311
In [36]:
from langchain_openai import ChatOpenAI
from langchain import hub
from langchain.agents import create_openai_functions_agent
from langchain.agents import AgentExecutor

prompt = hub.pull("hwchase17/openai-functions-agent")
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
agent = create_openai_functions_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
In [37]:
agent_executor.invoke({"input":"how can langsmith help with testing?"})

> Entering new AgentExecutor chain...

Invoking: `langsmith_search` with `{'query': 'how can LangSmith help with testing'}`


Skip to main content🦜️🛠️ LangSmith DocsLangChain Python DocsLangChain JS/TS DocsLangSmith API DocsSearchGo to AppLangSmithUser GuideSetupPricing (Coming Soon)Self-HostingTracingEvaluationMonitoringPrompt HubProxyUser GuideOn this pageLangSmith User GuideLangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.Prototyping​Prototyping LLM applications often involves quick experimentation between prompts, model types, retrieval strategy and other parameters.
The ability to rapidly understand how the model is performing — and debug where it is failing — is incredibly important for this phase.Debugging​When developing new LLM applications, we suggest having LangSmith tracing enabled by default.
Oftentimes, it isn’t necessary to look at every single trace. However, when things go wrong (an unexpected end result, infinite agent loop, slower than expected execution, higher than expected token usage), it’s extremely helpful to debug by looking through the application traces. LangSmith gives clear visibility and debugging information at each step of an LLM sequence, making it much easier to identify and root-cause issues.
We provide native rendering of chat messages, functions, and retrieve documents.Initial Test Set​While many developers still ship an initial version of their application based on “vibe checks”, we’ve seen an increasing number of engineering teams start to adopt a more test driven approach. LangSmith allows developers to create datasets, which are collections of inputs and reference outputs, and use these to run tests on their LLM applications.
These test cases can be uploaded in bulk, created on the fly, or exported from application traces. LangSmith also makes it easy to run custom evaluations (both LLM and heuristic based) to score test results.Comparison View​When prototyping different versions of your applications and making changes, it’s important to see whether or not you’ve regressed with respect to your initial test cases.
Oftentimes, changes in the prompt, retrieval strategy, or model choice can have huge implications in responses produced by your application.
In order to get a sense for which variant is performing better, it’s useful to be able to view results for different configurations on the same datapoints side-by-side. We’ve invested heavily in a user-friendly comparison view for test runs to track and diagnose regressions in test scores across multiple revisions of your application.Playground​LangSmith provides a playground environment for rapid iteration and experimentation.
This allows you to quickly test out different prompts and models. You can open the playground from any prompt or model run in your trace.

This allows you to quickly test out different prompts and models. You can open the playground from any prompt or model run in your trace.
Every playground run is logged in the system and can be used to create test cases or compare with other runs.Beta Testing​Beta testing allows developers to collect more data on how their LLM applications are performing in real-world scenarios. In this phase, it’s important to develop an understanding for the types of inputs the app is performing well or poorly on and how exactly it’s breaking down in those cases. Both feedback collection and run annotation are critical for this workflow. This will help in curation of test cases that can help track regressions/improvements and development of automatic evaluations.Collecting Feedback​When launching your application to an initial set of users, it’s important to gather human feedback on the responses it’s producing. This helps draw attention to the most interesting runs and highlight edge cases that are causing problematic responses. LangSmith allows you to attach feedback scores to logged traces (oftentimes, this is hooked up to a feedback button in your app), then filter on traces that have a specific feedback tag and score. A common workflow is to filter on traces that receive a poor user feedback score, then drill down into problematic points using the detailed trace view.Annotating Traces​LangSmith also supports sending runs to annotation queues, which allow annotators to closely inspect interesting traces and annotate them with respect to different criteria. Annotators can be PMs, engineers, or even subject matter experts. This allows users to catch regressions across important evaluation criteria.Adding Runs to a Dataset​As your application progresses through the beta testing phase, it's essential to continue collecting data to refine and improve its performance. LangSmith enables you to add runs as examples to datasets (from both the project page and within an annotation queue), expanding your test coverage on real-world scenarios. This is a key benefit in having your logging system and your evaluation/testing system in the same platform.Production​Closely inspecting key data points, growing benchmarking datasets, annotating traces, and drilling down into important data in trace view are workflows you’ll also want to do once your app hits production. However, especially at the production stage, it’s crucial to get a high-level overview of application performance with respect to latency, cost, and feedback scores. This ensures that it's delivering desirable results at scale.Monitoring and A/B Testing​LangSmith provides monitoring charts that allow you to track key metrics over time. You can expand to view metrics for a given period and drill down into a specific data point to get a trace table for that time period — this is especially handy for debugging production issues.LangSmith also allows for tag and metadata grouping, which allows users to mark different versions of their applications with different identifiers and view how they are performing side-by-side within each chart. This is helpful for A/B testing changes in prompt, model, or retrieval strategy.Was this page helpful?PreviousLangSmithNextSetupPrototypingBeta TestingProductionCommunityDiscordTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogCopyright © 2024 LangChain, Inc.

LangSmith User Guide | 🦜️🛠️ LangSmithLangSmith can help with testing in various ways throughout the application development lifecycle. Here are some key ways LangSmith supports testing:

1. Prototyping: LangSmith allows for quick experimentation between prompts, model types, retrieval strategies, and other parameters to understand how the model is performing and debug any issues.

2. Debugging: LangSmith provides clear visibility and debugging information at each step of an LLM sequence, making it easier to identify and root-cause issues when things go wrong.

3. Initial Test Set: Developers can create datasets of inputs and reference outputs to run tests on their LLM applications. LangSmith makes it easy to run custom evaluations to score test results.

4. Comparison View: LangSmith offers a user-friendly comparison view for test runs to track and diagnose regressions in test scores across multiple revisions of an application.

5. Playground: LangSmith provides a playground environment for rapid iteration and experimentation with different prompts and models, allowing users to create test cases and compare different runs.

6. Beta Testing: Developers can collect more data on how their LLM applications perform in real-world scenarios during beta testing. LangSmith supports feedback collection and run annotation to track regressions and improvements.

7. Collecting Feedback: LangSmith allows developers to gather human feedback on the responses produced by their applications, helping to highlight edge cases and problematic responses.

8. Annotating Traces: Users can send runs to annotation queues in LangSmith to closely inspect interesting traces and annotate them with different criteria, catching regressions across important evaluation criteria.

9. Production: In the production stage, LangSmith enables users to closely inspect key data points, grow benchmarking datasets, and monitor application performance with respect to latency, cost, and feedback scores.

10. Monitoring and A/B Testing: LangSmith provides monitoring charts to track key metrics over time and allows for A/B testing changes in prompt, model, or retrieval strategy.

Overall, LangSmith offers a comprehensive platform for LLM application development, monitoring, and testing, supporting developers at every stage of the application lifecycle.

> Finished chain.
Out[37]:
{'input': 'how can langsmith help with testing?',
 'output': 'LangSmith can help with testing in various ways throughout the application development lifecycle. Here are some key ways LangSmith supports testing:\n\n1. Prototyping: LangSmith allows for quick experimentation between prompts, model types, retrieval strategies, and other parameters to understand how the model is performing and debug any issues.\n\n2. Debugging: LangSmith provides clear visibility and debugging information at each step of an LLM sequence, making it easier to identify and root-cause issues when things go wrong.\n\n3. Initial Test Set: Developers can create datasets of inputs and reference outputs to run tests on their LLM applications. LangSmith makes it easy to run custom evaluations to score test results.\n\n4. Comparison View: LangSmith offers a user-friendly comparison view for test runs to track and diagnose regressions in test scores across multiple revisions of an application.\n\n5. Playground: LangSmith provides a playground environment for rapid iteration and experimentation with different prompts and models, allowing users to create test cases and compare different runs.\n\n6. Beta Testing: Developers can collect more data on how their LLM applications perform in real-world scenarios during beta testing. LangSmith supports feedback collection and run annotation to track regressions and improvements.\n\n7. Collecting Feedback: LangSmith allows developers to gather human feedback on the responses produced by their applications, helping to highlight edge cases and problematic responses.\n\n8. Annotating Traces: Users can send runs to annotation queues in LangSmith to closely inspect interesting traces and annotate them with different criteria, catching regressions across important evaluation criteria.\n\n9. Production: In the production stage, LangSmith enables users to closely inspect key data points, grow benchmarking datasets, and monitor application performance with respect to latency, cost, and feedback scores.\n\n10. Monitoring and A/B Testing: LangSmith provides monitoring charts to track key metrics over time and allows for A/B testing changes in prompt, model, or retrieval strategy.\n\nOverall, LangSmith offers a comprehensive platform for LLM application development, monitoring, and testing, supporting developers at every stage of the application lifecycle.'}
In [38]:
agent_executor.invoke({"input":"what is the weather in SF?"})

> Entering new AgentExecutor chain...

Invoking: `tavily_search_results_json` with `{'query': 'weather in San Francisco'}`


[{'url': 'https://www.accuweather.com/en/us/san-francisco/94103/hourly-weather-forecast/347629', 'content': 'Get the latest hourly weather conditions and outlook for San Francisco, CA, including temperature, precipitation, wind, air quality and more. See the cloud cover, visibility and cloud ceiling for each hour until 11 PM.'}, {'url': 'https://weather.com/weather/tenday/l/San Francisco CA USCA0987:1:US', 'content': "Comfy & Cozy\nThat's Not What Was Expected\nOutside\n'No-Name Storms' In Florida\nGifts From On High\nWhat To Do For Wheezing\nSurviving The Season\nStay Safe\nAir Quality Index\nAir quality is considered satisfactory, and air pollution poses little or no risk.\n Health & Activities\nSeasonal Allergies and Pollen Count Forecast\nNo pollen detected in your area\nCold & Flu Forecast\nFlu risk is low in your area\nWe recognize our responsibility to use data and technology for good. recents\nSpecialty Forecasts\n10 Day Weather-San Francisco, CA\nToday\nMon 18 | Day\nConsiderable cloudiness. Tue 19\nTue 19 | Day\nLight rain early...then remaining cloudy with showers in the afternoon. Wed 27\nWed 27 | Day\nOvercast with rain showers at times."}, {'url': 'https://www.accuweather.com/en/us/san-francisco/94103/current-weather/347629', 'content': 'Get the latest weather conditions and outlook for San Francisco, CA, including temperature, humidity, wind, pressure, and cloud cover. See alerts, sunrise and sunset times, and historical data for the city.'}, {'url': 'https://www.accuweather.com/en/us/san-francisco/94103/weather-forecast/347629', 'content': 'Get the current and future weather conditions for San Francisco, CA, including temperature, precipitation, wind, air quality and more. See the hourly and 10-day outlook, radar maps, alerts and allergy information.'}, {'url': 'https://www.wunderground.com/forecast/us/ca/san-francisco', 'content': 'Get the latest weather information for San Francisco, CA, including temperature, precipitation, wind speed, and humidity. See the hourly and 10-day forecast for the South of Market station and other nearby weather stations.'}]I found some sources where you can check the weather in San Francisco:

1. [AccuWeather - Hourly Weather Forecast](https://www.accuweather.com/en/us/san-francisco/94103/hourly-weather-forecast/347629): Get the latest hourly weather conditions and outlook for San Francisco, CA, including temperature, precipitation, wind, air quality, and more.

2. [The Weather Channel - 10 Day Weather Forecast](https://weather.com/weather/tenday/l/San Francisco CA USCA0987:1:US): Check the 10-day weather forecast for San Francisco, CA, including details on temperature, precipitation, and more.

3. [AccuWeather - Current Weather Conditions](https://www.accuweather.com/en/us/san-francisco/94103/current-weather/347629): Get the latest weather conditions and outlook for San Francisco, CA, including temperature, humidity, wind, pressure, and cloud cover.

4. [AccuWeather - Weather Forecast](https://www.accuweather.com/en/us/san-francisco/94103/weather-forecast/347629): Check the current and future weather conditions for San Francisco, CA, including temperature, precipitation, wind, air quality, and more.

5. [Weather Underground - Forecast](https://www.wunderground.com/forecast/us/ca/san-francisco): Get the latest weather information for San Francisco, CA, including temperature, precipitation, wind speed, and humidity. See the hourly and 10-day forecast for the South of Market station and other nearby weather stations.

> Finished chain.
Out[38]:
{'input': 'what is the weather in SF?',
 'output': 'I found some sources where you can check the weather in San Francisco:\n\n1. [AccuWeather - Hourly Weather Forecast](https://www.accuweather.com/en/us/san-francisco/94103/hourly-weather-forecast/347629): Get the latest hourly weather conditions and outlook for San Francisco, CA, including temperature, precipitation, wind, air quality, and more.\n\n2. [The Weather Channel - 10 Day Weather Forecast](https://weather.com/weather/tenday/l/San Francisco CA USCA0987:1:US): Check the 10-day weather forecast for San Francisco, CA, including details on temperature, precipitation, and more.\n\n3. [AccuWeather - Current Weather Conditions](https://www.accuweather.com/en/us/san-francisco/94103/current-weather/347629): Get the latest weather conditions and outlook for San Francisco, CA, including temperature, humidity, wind, pressure, and cloud cover.\n\n4. [AccuWeather - Weather Forecast](https://www.accuweather.com/en/us/san-francisco/94103/weather-forecast/347629): Check the current and future weather conditions for San Francisco, CA, including temperature, precipitation, wind, air quality, and more.\n\n5. [Weather Underground - Forecast](https://www.wunderground.com/forecast/us/ca/san-francisco): Get the latest weather information for San Francisco, CA, including temperature, precipitation, wind speed, and humidity. See the hourly and 10-day forecast for the South of Market station and other nearby weather stations.'}
In [39]:
chat_history = [
    HumanMessage(
        content="Can LangSmith help test my LLM applications?"
    ),
    AIMessage(content="Yes!")
]
agent_executor.invoke({
    "chat_history": chat_history,
    "input": "Tell me how"
})

> Entering new AgentExecutor chain...

Invoking: `langsmith_search` with `{'query': 'LangSmith LLM application testing services'}`


Skip to main content🦜️🛠️ LangSmith DocsLangChain Python DocsLangChain JS/TS DocsLangSmith API DocsSearchGo to AppLangSmithUser GuideSetupPricing (Coming Soon)Self-HostingTracingEvaluationMonitoringPrompt HubProxyUser GuideOn this pageLangSmith User GuideLangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.Prototyping​Prototyping LLM applications often involves quick experimentation between prompts, model types, retrieval strategy and other parameters.
The ability to rapidly understand how the model is performing — and debug where it is failing — is incredibly important for this phase.Debugging​When developing new LLM applications, we suggest having LangSmith tracing enabled by default.
Oftentimes, it isn’t necessary to look at every single trace. However, when things go wrong (an unexpected end result, infinite agent loop, slower than expected execution, higher than expected token usage), it’s extremely helpful to debug by looking through the application traces. LangSmith gives clear visibility and debugging information at each step of an LLM sequence, making it much easier to identify and root-cause issues.
We provide native rendering of chat messages, functions, and retrieve documents.Initial Test Set​While many developers still ship an initial version of their application based on “vibe checks”, we’ve seen an increasing number of engineering teams start to adopt a more test driven approach. LangSmith allows developers to create datasets, which are collections of inputs and reference outputs, and use these to run tests on their LLM applications.
These test cases can be uploaded in bulk, created on the fly, or exported from application traces. LangSmith also makes it easy to run custom evaluations (both LLM and heuristic based) to score test results.Comparison View​When prototyping different versions of your applications and making changes, it’s important to see whether or not you’ve regressed with respect to your initial test cases.
Oftentimes, changes in the prompt, retrieval strategy, or model choice can have huge implications in responses produced by your application.
In order to get a sense for which variant is performing better, it’s useful to be able to view results for different configurations on the same datapoints side-by-side. We’ve invested heavily in a user-friendly comparison view for test runs to track and diagnose regressions in test scores across multiple revisions of your application.Playground​LangSmith provides a playground environment for rapid iteration and experimentation.
This allows you to quickly test out different prompts and models. You can open the playground from any prompt or model run in your trace.

LangSmith User Guide | 🦜️🛠️ LangSmith

This allows you to quickly test out different prompts and models. You can open the playground from any prompt or model run in your trace.
Every playground run is logged in the system and can be used to create test cases or compare with other runs.Beta Testing​Beta testing allows developers to collect more data on how their LLM applications are performing in real-world scenarios. In this phase, it’s important to develop an understanding for the types of inputs the app is performing well or poorly on and how exactly it’s breaking down in those cases. Both feedback collection and run annotation are critical for this workflow. This will help in curation of test cases that can help track regressions/improvements and development of automatic evaluations.Collecting Feedback​When launching your application to an initial set of users, it’s important to gather human feedback on the responses it’s producing. This helps draw attention to the most interesting runs and highlight edge cases that are causing problematic responses. LangSmith allows you to attach feedback scores to logged traces (oftentimes, this is hooked up to a feedback button in your app), then filter on traces that have a specific feedback tag and score. A common workflow is to filter on traces that receive a poor user feedback score, then drill down into problematic points using the detailed trace view.Annotating Traces​LangSmith also supports sending runs to annotation queues, which allow annotators to closely inspect interesting traces and annotate them with respect to different criteria. Annotators can be PMs, engineers, or even subject matter experts. This allows users to catch regressions across important evaluation criteria.Adding Runs to a Dataset​As your application progresses through the beta testing phase, it's essential to continue collecting data to refine and improve its performance. LangSmith enables you to add runs as examples to datasets (from both the project page and within an annotation queue), expanding your test coverage on real-world scenarios. This is a key benefit in having your logging system and your evaluation/testing system in the same platform.Production​Closely inspecting key data points, growing benchmarking datasets, annotating traces, and drilling down into important data in trace view are workflows you’ll also want to do once your app hits production. However, especially at the production stage, it’s crucial to get a high-level overview of application performance with respect to latency, cost, and feedback scores. This ensures that it's delivering desirable results at scale.Monitoring and A/B Testing​LangSmith provides monitoring charts that allow you to track key metrics over time. You can expand to view metrics for a given period and drill down into a specific data point to get a trace table for that time period — this is especially handy for debugging production issues.LangSmith also allows for tag and metadata grouping, which allows users to mark different versions of their applications with different identifiers and view how they are performing side-by-side within each chart. This is helpful for A/B testing changes in prompt, model, or retrieval strategy.Was this page helpful?PreviousLangSmithNextSetupPrototypingBeta TestingProductionCommunityDiscordTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogCopyright © 2024 LangChain, Inc.LangSmith is a platform for LLM application development, monitoring, and testing. Here are some ways LangSmith can help test your LLM applications:

1. Prototyping: LangSmith supports quick experimentation between prompts, model types, retrieval strategies, and other parameters for prototyping LLM applications.

2. Debugging: LangSmith provides tracing capabilities to help debug issues in LLM applications by giving clear visibility and debugging information at each step of an LLM sequence.

3. Initial Test Set: Developers can create datasets of inputs and reference outputs to run tests on their LLM applications. LangSmith makes it easy to run custom evaluations to score test results.

4. Comparison View: LangSmith offers a user-friendly comparison view for test runs to track and diagnose regressions in test scores across multiple revisions of your application.

5. Playground: LangSmith provides a playground environment for rapid iteration and experimentation with different prompts and models.

6. Beta Testing: Developers can collect more data on how their LLM applications are performing in real-world scenarios during beta testing phase.

7. Collecting Feedback: LangSmith allows developers to gather human feedback on the responses produced by their applications to improve performance.

8. Annotating Traces: LangSmith supports sending runs to annotation queues for annotators to inspect and annotate interesting traces with respect to different criteria.

9. Monitoring and A/B Testing: LangSmith provides monitoring charts to track key metrics over time and allows for A/B testing changes in prompt, model, or retrieval strategy.

These are some of the ways LangSmith can assist in testing your LLM applications.

> Finished chain.
Out[39]:
{'chat_history': [HumanMessage(content='Can LangSmith help test my LLM applications?'),
  AIMessage(content='Yes!')],
 'input': 'Tell me how',
 'output': 'LangSmith is a platform for LLM application development, monitoring, and testing. Here are some ways LangSmith can help test your LLM applications:\n\n1. Prototyping: LangSmith supports quick experimentation between prompts, model types, retrieval strategies, and other parameters for prototyping LLM applications.\n\n2. Debugging: LangSmith provides tracing capabilities to help debug issues in LLM applications by giving clear visibility and debugging information at each step of an LLM sequence.\n\n3. Initial Test Set: Developers can create datasets of inputs and reference outputs to run tests on their LLM applications. LangSmith makes it easy to run custom evaluations to score test results.\n\n4. Comparison View: LangSmith offers a user-friendly comparison view for test runs to track and diagnose regressions in test scores across multiple revisions of your application.\n\n5. Playground: LangSmith provides a playground environment for rapid iteration and experimentation with different prompts and models.\n\n6. Beta Testing: Developers can collect more data on how their LLM applications are performing in real-world scenarios during beta testing phase.\n\n7. Collecting Feedback: LangSmith allows developers to gather human feedback on the responses produced by their applications to improve performance.\n\n8. Annotating Traces: LangSmith supports sending runs to annotation queues for annotators to inspect and annotate interesting traces with respect to different criteria.\n\n9. Monitoring and A/B Testing: LangSmith provides monitoring charts to track key metrics over time and allows for A/B testing changes in prompt, model, or retrieval strategy.\n\nThese are some of the ways LangSmith can assist in testing your LLM applications.'}

基本的な agent を設定することができた。 もっと詳しい情報は Agents を参照してほしい。

Serving with LangServe (LangServe でサービスを提供する)

アプリケーションを構築したので、それを外部に提供する。 ここで LangServe が必要になる。 LangServe は LangChain chain を REST API として配備するのに役に立つ。

この文書はこれまで Jupyter で実行することを意図していたが、これ以降は Python ファイルを作成してコマンドラインから対話実行する。

In [40]:
! pip install "langserve[all]"
Collecting langserve[all]
  Downloading langserve-0.0.47-py3-none-any.whl (937 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 938.0/938.0 kB 5.2 MB/s eta 0:00:00
Requirement already satisfied: httpx>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from langserve[all]) (0.27.0)
Requirement already satisfied: langchain>=0.0.333 in /usr/local/lib/python3.10/dist-packages (from langserve[all]) (0.1.11)
Requirement already satisfied: orjson>=2 in /usr/local/lib/python3.10/dist-packages (from langserve[all]) (3.9.15)
Requirement already satisfied: pydantic>=1 in /usr/local/lib/python3.10/dist-packages (from langserve[all]) (2.6.3)
Collecting fastapi<1,>=0.90.1 (from langserve[all])
  Downloading fastapi-0.110.0-py3-none-any.whl (92 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 92.1/92.1 kB 11.3 MB/s eta 0:00:00
Collecting httpx-sse>=0.3.1 (from langserve[all])
  Downloading httpx_sse-0.4.0-py3-none-any.whl (7.8 kB)
Collecting sse-starlette<2.0.0,>=1.3.0 (from langserve[all])
  Downloading sse_starlette-1.8.2-py3-none-any.whl (8.9 kB)
Collecting starlette<0.37.0,>=0.36.3 (from fastapi<1,>=0.90.1->langserve[all])
  Downloading starlette-0.36.3-py3-none-any.whl (71 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 71.5/71.5 kB 11.1 MB/s eta 0:00:00
Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from fastapi<1,>=0.90.1->langserve[all]) (4.10.0)
Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx>=0.23.0->langserve[all]) (3.7.1)
Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx>=0.23.0->langserve[all]) (2024.2.2)
Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx>=0.23.0->langserve[all]) (1.0.4)
Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx>=0.23.0->langserve[all]) (3.6)
Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx>=0.23.0->langserve[all]) (1.3.1)
Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx>=0.23.0->langserve[all]) (0.14.0)
Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain>=0.0.333->langserve[all]) (6.0.1)
Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain>=0.0.333->langserve[all]) (2.0.28)
Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain>=0.0.333->langserve[all]) (3.9.3)
Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain>=0.0.333->langserve[all]) (4.0.3)
Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /usr/local/lib/python3.10/dist-packages (from langchain>=0.0.333->langserve[all]) (0.6.4)
Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.10/dist-packages (from langchain>=0.0.333->langserve[all]) (1.33)
Requirement already satisfied: langchain-community<0.1,>=0.0.25 in /usr/local/lib/python3.10/dist-packages (from langchain>=0.0.333->langserve[all]) (0.0.27)
Requirement already satisfied: langchain-core<0.2,>=0.1.29 in /usr/local/lib/python3.10/dist-packages (from langchain>=0.0.333->langserve[all]) (0.1.30)
Requirement already satisfied: langchain-text-splitters<0.1,>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from langchain>=0.0.333->langserve[all]) (0.0.1)
Requirement already satisfied: langsmith<0.2.0,>=0.1.17 in /usr/local/lib/python3.10/dist-packages (from langchain>=0.0.333->langserve[all]) (0.1.23)
Requirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain>=0.0.333->langserve[all]) (1.25.2)
Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain>=0.0.333->langserve[all]) (2.31.0)
Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain>=0.0.333->langserve[all]) (8.2.3)
Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1->langserve[all]) (0.6.0)
Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1->langserve[all]) (2.16.3)
Collecting uvicorn (from sse-starlette<2.0.0,>=1.3.0->langserve[all])
  Downloading uvicorn-0.28.0-py3-none-any.whl (60 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 60.6/60.6 kB 8.8 MB/s eta 0:00:00
Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.333->langserve[all]) (1.3.1)
Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.333->langserve[all]) (23.2.0)
Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.333->langserve[all]) (1.4.1)
Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.333->langserve[all]) (6.0.5)
Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.333->langserve[all]) (1.9.4)
Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /usr/local/lib/python3.10/dist-packages (from dataclasses-json<0.7,>=0.5.7->langchain>=0.0.333->langserve[all]) (3.21.1)
Requirement already satisfied: typing-inspect<1,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from dataclasses-json<0.7,>=0.5.7->langchain>=0.0.333->langserve[all]) (0.9.0)
Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.10/dist-packages (from jsonpatch<2.0,>=1.33->langchain>=0.0.333->langserve[all]) (2.4)
Requirement already satisfied: packaging<24.0,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.2,>=0.1.29->langchain>=0.0.333->langserve[all]) (23.2)
Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx>=0.23.0->langserve[all]) (1.2.0)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain>=0.0.333->langserve[all]) (3.3.2)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain>=0.0.333->langserve[all]) (2.0.7)
Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain>=0.0.333->langserve[all]) (3.0.3)
Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn->sse-starlette<2.0.0,>=1.3.0->langserve[all]) (8.1.7)
Requirement already satisfied: mypy-extensions>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain>=0.0.333->langserve[all]) (1.0.0)
Installing collected packages: uvicorn, httpx-sse, starlette, fastapi, sse-starlette, langserve
Successfully installed fastapi-0.110.0 httpx-sse-0.4.0 langserve-0.0.47 sse-starlette-1.8.2 starlette-0.36.3 uvicorn-0.28.0

Server

In [41]:
!python {SAVE_PATH}/langserve_serv.py
python3: can't open file '/content/drive/MyDrive/DeepLearning/llm_book2/langserve_serv.py': [Errno 2] No such file or directory

CLient

In [41]: