Build Q&A systems with the Claude API in Python. FAQ bots, document Q&A, context-grounded answering, confidence scoring, and unanswerable detection — complete working examples.
Claude excels at question answering: it grounds answers in provided context, detects unanswerable questions, and returns structured output suitable for production Q&A pipelines. This guide covers FAQ bots, document Q&A, confidence scoring, and multi-question batch answering.
pip install anthropic
import anthropic
client = anthropic.Anthropic()
def answer(question: str, context: str) -> str:
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
system=(
"Answer the question using only the provided context. "
"If the answer is not in the context, say: 'I cannot find this in the provided information.'"
),
messages=[{
"role": "user",
"content": f"Context:
{context}
Question: {question}"
}]
)
return message.content[0].text
context = """
Anthropic was founded in 2021 by Dario Amodei, Daniela Amodei, and other former OpenAI researchers.
The company is headquartered in San Francisco. Claude is Anthropic's AI assistant, first released in 2023.
"""
print(answer("When was Anthropic founded?", context))
# "Anthropic was founded in 2021."
print(answer("What is Anthropic's stock price?", context))
# "I cannot find this in the provided information."
import json
def answer_structured(question: str, context: str) -> dict:
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=512,
system=(
"Answer questions from the provided context. "
"Return a JSON object with keys: "
"'answer' (string, or null if unanswerable), "
"'answerable' (boolean), "
"'confidence' ('high'|'medium'|'low'), "
"'source_quote' (verbatim excerpt from context supporting the answer, or null). "
"No markdown fences."
),
messages=[{
"role": "user",
"content": f"Context:
{context}
Question: {question}"
}]
)
return json.loads(message.content[0].text)
result = answer_structured("Who founded Anthropic?", context)
# {"answer": "Dario Amodei, Daniela Amodei, and other former OpenAI researchers",
# "answerable": true, "confidence": "high",
# "source_quote": "Anthropic was founded in 2021 by Dario Amodei, Daniela Amodei, and other former OpenAI researchers."}
def answer_multiple(questions: list[str], context: str) -> list[dict]:
qs_formatted = "
".join(f"{i+1}. {q}" for i, q in enumerate(questions))
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=2048,
system=(
"Answer each question from the provided context. "
"Return a JSON array of objects: [{question, answer, answerable}]. "
"Set answerable=false and answer=null if the context doesn't contain the answer. "
"No markdown fences."
),
messages=[{
"role": "user",
"content": f"Context:
{context}
Questions:
{qs_formatted}"
}]
)
return json.loads(message.content[0].text)
results = answer_multiple(
["When was Anthropic founded?", "Who is the CEO?", "What is their revenue?"],
context
)
# [{"question": "When was Anthropic founded?", "answer": "2021", "answerable": true},
# {"question": "Who is the CEO?", "answer": "Dario Amodei", "answerable": true},
# {"question": "What is their revenue?", "answer": null, "answerable": false}]
class FAQBot:
def __init__(self, knowledge_base: str, max_turns: int = 20):
self.knowledge_base = knowledge_base
self.max_turns = max_turns
self.history = []
def ask(self, question: str) -> str:
self.history.append({"role": "user", "content": question})
message = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
system=(
f"You are a helpful FAQ assistant. Answer questions using only the knowledge base below. "
f"If the answer is not in the knowledge base, say so clearly.
"
f"Knowledge base:
{self.knowledge_base}"
),
messages=self.history[-self.max_turns:] # keep last N turns
)
reply = message.content[0].text
self.history.append({"role": "assistant", "content": reply})
return reply
# Usage
kb = "Our return policy: 30-day returns for unused items. Electronics: 15-day return window. No returns on digital downloads."
bot = FAQBot(kb)
print(bot.ask("Can I return a laptop after 20 days?"))
# "Based on our policy, electronics have a 15-day return window, so a 20-day return would not be accepted."
print(bot.ask("What about software I downloaded?"))
# "Digital downloads cannot be returned per our policy."
def answer_from_document(question: str, document_text: str) -> dict:
# Claude supports up to ~180K tokens context — suitable for most documents
# For very large corpora (books, wikis), use RAG instead
message = client.messages.create(
model="claude-sonnet-4-6", # longer context than Haiku
max_tokens=1024,
system=(
"You are a document analyst. Answer the question based solely on the document provided. "
"Return JSON: {'answer': str|null, 'answerable': bool, 'page_hint': str|null}. "
"In page_hint, reference the approximate section/paragraph containing the answer. "
"No markdown fences."
),
messages=[{
"role": "user",
"content": f"Document:
{document_text[:150000]}
Question: {question}" # 150K char safety cap
}]
)
return json.loads(message.content[0].text)
| Approach | Best for | Document size limit | Latency |
|---|---|---|---|
| Full-context Q&A (this guide) | Single documents, FAQs | ~180K tokens (~140K words) | 1–3s |
| RAG (vector DB + Claude) | Large corpora, real-time KB | Unlimited | 1–4s |
| Batch multi-question | Offline document analysis | ~180K tokens | Async |
| Stateful chatbot | Multi-turn FAQ support | ~20 turns in context | 1–3s/turn |
For large corpora requiring retrieval, see the RAG with Claude guide. To estimate Q&A pipeline costs before building, use the Claude API Cost Calculator.