Tutorial 4: Guardrails and Safety
Protect your agents from PII leaks, prompt injection, and hallucinations.
Quick Setup
from largestack import Agent, create_guardrails
agent = Agent(
name="safe-agent",
guardrails=create_guardrails(
pii=True, # Detect emails, SSN, credit cards
injection=True, # Block prompt injection
hallucination=True, # NLI faithfulness check (for RAG)
toxicity=True, # Block toxic content
topic_blocklist=["politics", "religion"],
),
)
By Name
agent = Agent(
name="safe",
guardrails=["pii", "injection", "hallucination", "toxicity"],
)
PII Redaction
from largestack._guard.pii import PIIGuard
guard = PIIGuard()
text = "Email me at [email protected], my SSN is 123-45-6789"
clean = guard.redact(text)
# "Email me at [EMAIL_REDACTED], my SSN is [SSN_REDACTED]"
Steering Hooks
from largestack import steer_before_tool, proceed, interrupt
@steer_before_tool
def block_writes(tool_name, params, context):
if tool_name in ("write_file", "shell_command", "database_query"):
return interrupt("Write operations not allowed in this mode")
return proceed()
agent = Agent(name="read-only", steering=[block_writes])
Kill Switch
largestack resume # Resume after emergency stop
from largestack._guard.kill_switch import activate, deactivate
activate("security incident") # All agents halt immediately
deactivate() # Resume