messages = [ ( "system", "You are a helpful assistant that translates English to French. Translate the user sentence.", ), ("human", "I love programming."),]ai_msg = model.invoke(messages)ai_msg
from langchain_anthropic import ChatAnthropicfrom typing_extensions import Annotatedmodel = ChatAnthropic(model="claude-haiku-4-5-20251001")def get_weather( location: Annotated[str, ..., "Location as city and state."]) -> str: """Get the weather at a location.""" return "It's sunny."model_with_tools = model.bind_tools([get_weather])response = model_with_tools.invoke("Which city is hotter today: LA or NY?")response.content
Copy
[{'text': "I'll help you compare the temperatures of Los Angeles and New York by checking their current weather. I'll retrieve the weather for both cities.", 'type': 'text'}, {'id': 'toolu_01CkMaXrgmsNjTso7so94RJq', 'input': {'location': 'Los Angeles, CA'}, 'name': 'get_weather', 'type': 'tool_use'}, {'id': 'toolu_01SKaTBk9wHjsBTw5mrPVSQf', 'input': {'location': 'New York, NY'}, 'name': 'get_weather', 'type': 'tool_use'}]
使用 content_blocks 将以跨提供商一致的标准格式呈现内容:
Copy
response.content_blocks
Copy
[{'type': 'text', 'text': "I'll help you compare the temperatures of Los Angeles and New York by checking their current weather. I'll retrieve the weather for both cities."}, {'type': 'tool_call', 'name': 'get_weather', 'args': {'location': 'Los Angeles, CA'}, 'id': 'toolu_01CkMaXrgmsNjTso7so94RJq'}, {'type': 'tool_call', 'name': 'get_weather', 'args': {'location': 'New York, NY'}, 'id': 'toolu_01SKaTBk9wHjsBTw5mrPVSQf'}]
import jsonfrom langchain_anthropic import ChatAnthropicmodel = ChatAnthropic( model="claude-sonnet-4-5-20250929", max_tokens=5000, thinking={"type": "enabled", "budget_tokens": 2000},)response = model.invoke("What is the cube root of 50.653?")print(json.dumps(response.content_blocks, indent=2))
Copy
[ { "type": "reasoning", "reasoning": "To find the cube root of 50.653, I need to find the value of $x$ such that $x^3 = 50.653$.\n\nI can try to estimate this first. \n$3^3 = 27$\n$4^3 = 64$\n\nSo the cube root of 50.653 will be somewhere between 3 and 4, but closer to 4.\n\nLet me try to compute this more precisely. I can use the cube root function:\n\ncube root of 50.653 = 50.653^(1/3)\n\nLet me calculate this:\n50.653^(1/3) \u2248 3.6998\n\nLet me verify:\n3.6998^3 \u2248 50.6533\n\nThat's very close to 50.653, so I'm confident that the cube root of 50.653 is approximately 3.6998.\n\nActually, let me compute this more precisely:\n50.653^(1/3) \u2248 3.69981\n\nLet me verify once more:\n3.69981^3 \u2248 50.652998\n\nThat's extremely close to 50.653, so I'll say that the cube root of 50.653 is approximately 3.69981.", "extras": {"signature": "ErUBCkYIBxgCIkB0UjV..."} }, { "text": "The cube root of 50.653 is approximately 3.6998.\n\nTo verify: 3.6998\u00b3 = 50.6530, which is very close to our original number.", "type": "text" }]
First invocation:{'cache_read': 0, 'cache_creation': 1458}Second:{'cache_read': 1458, 'cache_creation': 0}
Extended cachingThe cache lifetime is 5 minutes by default. If this is too short, you can apply one hour caching by enabling the "extended-cache-ttl-2025-04-11" beta header:
Copy
model = ChatAnthropic( model="claude-sonnet-4-5-20250929", betas=["extended-cache-ttl-2025-04-11"], )
and specifying "cache_control": {"type": "ephemeral", "ttl": "1h"}.Details of cached token counts will be included on the InputTokenDetails of response’s usage_metadata:
from langchain_anthropic import convert_to_anthropic_toolfrom langchain.tools import tool# For demonstration purposes, we artificially expand the# tool description.description = ( f"Get the weather at a location. By the way, check out this readme: {readme}")@tool(description=description)def get_weather(location: str) -> str: return "It's sunny."# Enable caching on the toolweather_tool = convert_to_anthropic_tool(get_weather) weather_tool["cache_control"] = {"type": "ephemeral"} model = ChatAnthropic(model="claude-sonnet-4-5-20250929")model_with_tools = model.bind_tools([weather_tool])query = "What's the weather in San Francisco?"response_1 = model_with_tools.invoke(query)response_2 = model_with_tools.invoke(query)usage_1 = response_1.usage_metadata["input_token_details"]usage_2 = response_2.usage_metadata["input_token_details"]print(f"First invocation:\n{usage_1}")print(f"\nSecond:\n{usage_2}")
Copy
First invocation:{'cache_read': 0, 'cache_creation': 1809}Second:{'cache_read': 1809, 'cache_creation': 0}
提示缓存可用于多轮对话中,以维护来自早期消息的上下文,而无需冗余处理。我们可以通过用 cache_control 标记最终消息来启用增量缓存。Claude 将自动为后续消息使用最长的先前缓存前缀。Below, we implement a simple chatbot that incorporates this feature. We follow the LangChain chatbot tutorial, but add a custom reducer that automatically marks the last content block in each user message with cache_control. See below:
Copy
import requestsfrom langchain_anthropic import ChatAnthropicfrom langgraph.checkpoint.memory import MemorySaverfrom langgraph.graph import START, StateGraph, add_messagesfrom typing_extensions import Annotated, TypedDictmodel = ChatAnthropic(model="claude-sonnet-4-5-20250929")# Pull LangChain readmeget_response = requests.get( "https://raw.githubusercontent.com/langchain-ai/langchain/master/README.md")readme = get_response.textdef messages_reducer(left: list, right: list) -> list: # Update last user message for i in range(len(right) - 1, -1, -1): if right[i].type == "human": right[i].content[-1]["cache_control"] = {"type": "ephemeral"} break return add_messages(left, right)class State(TypedDict): messages: Annotated[list, messages_reducer]workflow = StateGraph(state_schema=State)# Define the function that calls the modeldef call_model(state: State): response = model.invoke(state["messages"]) return {"messages": [response]}# Define the (single) node in the graphworkflow.add_edge(START, "model")workflow.add_node("model", call_model)# Add memorymemory = MemorySaver()app = workflow.compile(checkpointer=memory)
================================== Ai Message ==================================Hello, Bob! It's nice to meet you. How are you doing today? Is there something I can help you with?{'cache_read': 0, 'cache_creation': 0}
Copy
query = f"Check out this readme: {readme}"input_message = HumanMessage([{"type": "text", "text": query}])output = app.invoke({"messages": [input_message]}, config)output["messages"][-1].pretty_print()print(f"\n{output['messages'][-1].usage_metadata['input_token_details']}")
Copy
================================== Ai Message ==================================I can see you've shared the README from the LangChain GitHub repository. This is the documentation for LangChain, which is a popular framework for building applications powered by Large Language Models (LLMs). Here's a summary of what the README contains:LangChain is:- A framework for developing LLM-powered applications- Helps chain together components and integrations to simplify AI application development- Provides a standard interface for models, embeddings, vector stores, etc.Key features/benefits:- Real-time data augmentation (connect LLMs to diverse data sources)- Model interoperability (swap models easily as needed)- Large ecosystem of integrationsThe LangChain ecosystem includes:- LangSmith - For evaluations and observability- LangGraph - For building complex agents with customizable architecture- LangSmith - For deployment and scaling of agentsThe README also mentions installation instructions (`pip install -U langchain`) and links to various resources including tutorials, how-to guides, conceptual guides, and API references.Is there anything specific about LangChain you'd like to know more about, Bob?{'cache_read': 0, 'cache_creation': 1498}
Copy
query = "What was my name again?"input_message = HumanMessage([{"type": "text", "text": query}])output = app.invoke({"messages": [input_message]}, config)output["messages"][-1].pretty_print()print(f"\n{output['messages'][-1].usage_metadata['input_token_details']}")
Copy
================================== Ai Message ==================================Your name is Bob. You introduced yourself at the beginning of our conversation.{'cache_read': 1498, 'cache_creation': 269}
In the LangSmith trace, toggling “raw output” will show exactly what messages are sent to the chat model, including cache_control keys.
from langchain_anthropic import ChatAnthropicmodel = ChatAnthropic(model="claude-haiku-4-5-20251001")messages = [ { "role": "user", "content": [ { "type": "document", "source": { "type": "text", "media_type": "text/plain", "data": "The grass is green. The sky is blue.", }, "title": "My Document", "context": "This is a trustworthy document.", "citations": {"enabled": True}, }, {"type": "text", "text": "What color is the grass and sky?"}, ], }]response = model.invoke(messages)response.content
Copy
[{'text': 'Based on the document, ', 'type': 'text'}, {'text': 'the grass is green', 'type': 'text', 'citations': [{'type': 'char_location', 'cited_text': 'The grass is green. ', 'document_index': 0, 'document_title': 'My Document', 'start_char_index': 0, 'end_char_index': 20}]}, {'text': ', and ', 'type': 'text'}, {'text': 'the sky is blue', 'type': 'text', 'citations': [{'type': 'char_location', 'cited_text': 'The sky is blue.', 'document_index': 0, 'document_title': 'My Document', 'start_char_index': 20, 'end_char_index': 36}]}, {'text': '.', 'type': 'text'}]
Claude 支持 search_result 内容块,表示来自知识库或其他自定义源的查询的可引用结果。这些内容块可以传递给 claude,既可以作为顶层(如上面的示例),也可以在工具结果中。这允许 Claude 使用工具调用的结果来引用其响应的元素。To pass search results in response to tool calls, define a tool that returns a list of search_result content blocks in Anthropic’s native format. For example:
Copy
def retrieval_tool(query: str) -> list[dict]: """Access my knowledge base.""" # Run a search (e.g., with a LangChain vector store) results = vector_store.similarity_search(query=query, k=2) # Package results into search_result blocks return [ { "type": "search_result", # Customize fields as desired, using document metadata or otherwise "title": "My Document Title", "source": "Source description or provenance", "citations": {"enabled": True}, "content": [{"type": "text", "text": doc.page_content}], } for doc in results ]
End to end example with LangGraph
Here we demonstrate an end-to-end example in which we populate a LangChain vector store with sample documents and equip Claude with a tool that queries those documents.
The tool here takes a search query and a category string literal, but any valid tool signature can be used.
Copy
from typing import Literalfrom langchain.chat_models import init_chat_modelfrom langchain.embeddings import init_embeddingsfrom langchain_core.documents import Documentfrom langchain_core.vectorstores import InMemoryVectorStorefrom langgraph.checkpoint.memory import InMemorySaverfrom langchain.agents import create_agent# Set up vector storeembeddings = init_embeddings("openai:text-embedding-3-small")vector_store = InMemoryVectorStore(embeddings)document_1 = Document( id="1", page_content=( "To request vacation days, submit a leave request form through the " "HR portal. Approval will be sent by email." ), metadata={ "category": "HR Policy", "doc_title": "Leave Policy", "provenance": "Leave Policy - page 1", },)document_2 = Document( id="2", page_content="Managers will review vacation requests within 3 business days.", metadata={ "category": "HR Policy", "doc_title": "Leave Policy", "provenance": "Leave Policy - page 2", },)document_3 = Document( id="3", page_content=( "Employees with over 6 months tenure are eligible for 20 paid vacation days " "per year." ), metadata={ "category": "Benefits Policy", "doc_title": "Benefits Guide 2025", "provenance": "Benefits Policy - page 1", },)documents = [document_1, document_2, document_3]vector_store.add_documents(documents=documents)# Define toolasync def retrieval_tool( query: str, category: Literal["HR Policy", "Benefits Policy"]) -> list[dict]: """Access my knowledge base.""" def _filter_function(doc: Document) -> bool: return doc.metadata.get("category") == category results = vector_store.similarity_search( query=query, k=2, filter=_filter_function ) return [ { "type": "search_result", "title": doc.metadata["doc_title"], "source": doc.metadata["provenance"], "citations": {"enabled": True}, "content": [{"type": "text", "text": doc.page_content}], } for doc in results ]# Create agentmodel = init_chat_model("claude-haiku-4-5-20251001")checkpointer = InMemorySaver()agent = create_agent(model, [retrieval_tool], checkpointer=checkpointer)# Invoke on a queryconfig = {"configurable": {"thread_id": "session_1"}}input_message = { "role": "user", "content": "How do I request vacation days?",}async for step in agent.astream( {"messages": [input_message]}, config, stream_mode="values",): step["messages"][-1].pretty_print()
Anthropic also lets you specify your own splits using custom document types. LangChain text splitters can be used to generate meaningful splits for this purpose. See the below example, where we split the LangChain README (a markdown document) and pass it to Claude as context:
Web search tool is supported since langchain-anthropic>=0.3.13
Copy
from langchain_anthropic import ChatAnthropicmodel = ChatAnthropic(model="claude-sonnet-4-5-20250929")tool = {"type": "web_search_20250305", "name": "web_search", "max_uses": 3}model_with_tools = model.bind_tools([tool])response = model_with_tools.invoke("How do I update a web app to TypeScript 5.5?")
Code execution is supported since langchain-anthropic>=0.3.14
Copy
from langchain_anthropic import ChatAnthropicmodel = ChatAnthropic( model="claude-sonnet-4-5-20250929", betas=["code-execution-2025-05-22"],)tool = {"type": "code_execution_20250522", "name": "code_execution"}model_with_tools = model.bind_tools([tool])response = model_with_tools.invoke( "Calculate the mean and standard deviation of [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]")
Use with Files API
Using the Files API, Claude can write code to access files for data analysis and other purposes. See example below:
Copy
# Upload fileimport anthropicclient = anthropic.Anthropic()file = client.beta.files.upload( file=open("/path/to/sample_data.csv", "rb"))file_id = file.id# Run inferencefrom langchain_anthropic import ChatAnthropicmodel = ChatAnthropic( model="claude-sonnet-4-5-20250929", betas=["code-execution-2025-05-22"],)tool = {"type": "code_execution_20250522", "name": "code_execution"}model_with_tools = model.bind_tools([tool])input_message = { "role": "user", "content": [ { "type": "text", "text": "Please plot these data and tell me what you see.", }, { "type": "container_upload", "file_id": file_id, }, ]}model_with_tools.invoke([input_message])
Note that Claude may generate files as part of its code execution. You can access these files using the Files API:
Copy
# Take all file outputs for demonstration purposesfile_ids = []for block in response.content: if block["type"] == "code_execution_tool_result": file_ids.extend( content["file_id"] for content in block.get("content", {}).get("content", []) if "file_id" in content )for i, file_id in enumerate(file_ids): file_content = client.beta.files.download(file_id) file_content.write_to_file(f"/path/to/file_{i}.png")
from langchain_anthropic import ChatAnthropicmodel = ChatAnthropic(model="claude-sonnet-4-5-20250929")tool = {"type": "text_editor_20250124", "name": "str_replace_editor"}model_with_tools = model.bind_tools([tool])response = model_with_tools.invoke( "There's a syntax error in my primes.py file. Can you help me fix it?")print(response.text)response.tool_calls
Copy
I'd be happy to help you fix the syntax error in your primes.py file. First, let's look at the current content of the file to identify the error.