Examples
The following examples demonstrate common use cases. All examples assume you have phileas-python installed:
pip install phileas-redact
Redact email addresses
from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService
policy = Policy.from_dict({
"name": "email-redact",
"identifiers": {
"emailAddress": {
"emailAddressFilterStrategies": [{"strategy": "REDACT"}]
}
}
})
service = FilterService()
result = service.filter(
policy, "app", "doc-1",
"Please contact john.doe@example.com or support@company.org for help."
)
print(result.filtered_text)
# Please contact {{{REDACTED-email-address}}} or {{{REDACTED-email-address}}} for help.
Mask credit card numbers (show last 4 digits)
policy = Policy.from_dict({
"name": "cc-last4",
"identifiers": {
"creditCard": {
"creditCardFilterStrategies": [{"strategy": "LAST_4"}]
}
}
})
result = service.filter(policy, "app", "doc-2", "Charged to card 4111 1111 1111 1111.")
print(result.filtered_text)
# Charged to card ************1111.
Hash Social Security Numbers
Replace an SSN with its SHA-256 hash to maintain referential consistency without exposing the value.
policy = Policy.from_dict({
"name": "ssn-hash",
"identifiers": {
"ssn": {
"ssnFilterStrategies": [{"strategy": "HASH_SHA256_REPLACE"}]
}
}
})
result = service.filter(policy, "app", "doc-3", "SSN: 123-45-6789")
print(result.filtered_text)
# SSN: 01a54629efb952287e554eb23ef69c52097a75aecc0e3a93ca0855ab6d7a31a0
Use a custom redaction format
The %t placeholder in redactionFormat is replaced with the filter type name.
policy = Policy.from_dict({
"name": "custom-format",
"identifiers": {
"phoneNumber": {
"phoneNumberFilterStrategies": [
{"strategy": "REDACT", "redactionFormat": "[PHONE-REDACTED]"}
]
}
}
})
result = service.filter(policy, "app", "doc-4", "Call me at 555-867-5309.")
print(result.filtered_text)
# Call me at [PHONE-REDACTED].
Replace with a static value
policy = Policy.from_dict({
"name": "static-ip",
"identifiers": {
"ipAddress": {
"ipAddressFilterStrategies": [
{"strategy": "STATIC_REPLACE", "staticReplacement": "0.0.0.0"}
]
}
}
})
result = service.filter(policy, "app", "doc-5", "Server at 10.0.0.42 is down.")
print(result.filtered_text)
# Server at 0.0.0.0 is down.
Shift a date forward
Move detected dates forward by 2 years and 15 days for de-identification.
policy = Policy.from_dict({
"name": "date-shift",
"identifiers": {
"date": {
"dateFilterStrategies": [
{"strategy": "SHIFT_DATE", "shiftYears": 2, "shiftDays": 15}
]
}
}
})
result = service.filter(policy, "app", "doc-6", "Born on 01/15/1990.")
print(result.filtered_text)
# Born on 01/30/1992.
Filter multiple PII types at once
policy = Policy.from_dict({
"name": "multi",
"identifiers": {
"emailAddress": {
"emailAddressFilterStrategies": [{"strategy": "REDACT"}]
},
"ssn": {
"ssnFilterStrategies": [{"strategy": "REDACT"}]
},
"phoneNumber": {
"phoneNumberFilterStrategies": [{"strategy": "MASK"}]
},
"creditCard": {
"creditCardFilterStrategies": [{"strategy": "LAST_4"}]
}
}
})
text = (
"Name: Jane Smith, SSN: 987-65-4321, "
"Phone: (555) 123-4567, Email: jane@example.com, "
"Card: 5500 0000 0000 0004."
)
result = service.filter(policy, "app", "doc-7", text)
print(result.filtered_text)
# Name: Jane Smith, SSN: {{{REDACTED-ssn}}}, Phone: (***) ***-****,
# Email: {{{REDACTED-email-address}}}, Card: ************0004.
Inspect spans
Each Span in result.spans describes a single match:
for span in result.spans:
print(
f"[{span.filter_type:20s}] "
f"chars {span.character_start:3d}–{span.character_end:3d} "
f"'{span.text}' → '{span.replacement}' "
f"(confidence={span.confidence:.2f})"
)
Skip specific values with an ignore list
Prevent specific values from being redacted even when they match a filter pattern.
policy = Policy.from_dict({
"name": "ignored-emails",
"identifiers": {
"emailAddress": {
"emailAddressFilterStrategies": [{"strategy": "REDACT"}],
"ignored": ["noreply@internal.com", "admin@internal.com"]
}
}
})
result = service.filter(
policy, "app", "doc-8",
"Contact noreply@internal.com or john@example.com."
)
print(result.filtered_text)
# Contact noreply@internal.com or {{{REDACTED-email-address}}}.
Skip values matching a regex pattern
Use ignoredPatterns at the top level to exclude matches by pattern.
policy = Policy.from_dict({
"name": "pattern-ignore",
"identifiers": {
"phoneNumber": {
"phoneNumberFilterStrategies": [{"strategy": "REDACT"}]
}
},
"ignoredPatterns": ["\\d{3}-555-\\d{4}"] # keep 555 numbers unchanged
})
result = service.filter(
policy, "app", "doc-9",
"Call 555-555-1234 (public) or 800-867-5309 (private)."
)
print(result.filtered_text)
# Call 555-555-1234 (public) or {{{REDACTED-phone-number}}} (private).
Redact ZIP codes based on population
Use the population condition to redact only ZIP codes whose 2020 US Census population falls below (or above) a threshold. ZIP codes not found in the dataset are not redacted.
policy = Policy.from_dict({
"name": "zip-population",
"identifiers": {
"zipCode": {
"zipCodeFilterStrategies": [
{"strategy": "REDACT", "condition": "population < 20000"}
]
}
}
})
result = service.filter(
policy, "app", "doc-zip",
"Offices in 90210 and 10001."
)
print(result.filtered_text)
# Offices in {{{REDACTED-zip-code}}} and 10001.
# (90210 population ≈ 21,134 — below 20,000 threshold → redacted;
# 10001 population ≈ 32,612 — above threshold → kept)
Multiple strategies can handle different population ranges differently:
policy = Policy.from_dict({
"name": "zip-tiered",
"identifiers": {
"zipCode": {
"zipCodeFilterStrategies": [
{"strategy": "REDACT", "condition": "population < 20000"},
{"strategy": "STATIC_REPLACE",
"staticReplacement": "[LARGE-ZIP]", "condition": "population >= 20000"},
]
}
}
})
Load a policy from a JSON file
import json
from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService
with open("policy.json") as f:
policy = Policy.from_json(f.read())
service = FilterService()
result = service.filter(policy, "app", "doc-10", "Text to filter...")
Load a policy from a YAML file
from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService
with open("policy.yaml") as f:
policy = Policy.from_yaml(f.read())
service = FilterService()
result = service.filter(policy, "app", "doc-11", "Text to filter...")
An example policy.yaml:
name: my-policy
identifiers:
emailAddress:
emailAddressFilterStrategies:
- strategy: REDACT
redactionFormat: "{{{REDACTED-%t}}}"
ssn:
ssnFilterStrategies:
- strategy: MASK
ignored:
- admin@example.com
ignoredPatterns:
- "\\d{3}-555-\\d{4}"
NER-based person detection with ph-eye
Requires a running ph-eye service.
from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService
policy = Policy.from_dict({
"name": "ner-demo",
"identifiers": {
"phEye": [
{
"endpoint": "http://localhost:8080",
"labels": ["PERSON"],
"thresholds": {"PERSON": 0.85},
"phEyeFilterStrategies": [{"strategy": "REDACT"}]
}
]
}
})
service = FilterService()
result = service.filter(
policy, "app", "doc-12",
"Dr. Alice Johnson reviewed the case."
)
print(result.filtered_text)
# Dr. {{{REDACTED-person}}} reviewed the case.
Redact custom patterns with regex
Use custom pattern filters to detect domain-specific PII not covered by built-in filters.
from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService
policy = Policy.from_dict({
"name": "custom-patterns",
"identifiers": {
"patterns": [
{
"pattern": r"EMP-\d{6}",
"label": "employee-id",
"patternFilterStrategies": [{"strategy": "REDACT"}]
},
{
"pattern": r"[A-Z]{2}\d{6}",
"label": "passport-number",
"patternFilterStrategies": [{"strategy": "MASK"}]
}
]
}
})
service = FilterService()
result = service.filter(
policy, "app", "doc-13",
"Employee EMP-123456 has passport AB123456."
)
print(result.filtered_text)
# Employee {{{REDACTED-employee-id}}} has passport *********.
Redact terms from a dictionary
Use the dictionaries filter to redact known names or sensitive terms.
from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService
policy = Policy.from_dict({
"name": "dictionary-demo",
"identifiers": {
"dictionaries": [
{
"terms": ["John Smith", "Jane Doe", "confidential", "proprietary"],
"dictionaryFilterStrategies": [{"strategy": "REDACT"}]
}
]
}
})
service = FilterService()
result = service.filter(
policy, "app", "doc-14",
"John Smith shared confidential data with Jane Doe about the proprietary algorithm."
)
print(result.filtered_text)
# {{{REDACTED-dictionary}}} shared {{{REDACTED-dictionary}}} data with {{{REDACTED-dictionary}}} about the {{{REDACTED-dictionary}}} algorithm.
Use conditions to filter selectively
Apply different strategies based on the matched value using condition expressions.
from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService
policy = Policy.from_dict({
"name": "conditional-filtering",
"identifiers": {
"phoneNumber": {
"phoneNumberFilterStrategies": [
# Redact phone numbers starting with 555 (test numbers)
{"strategy": "REDACT", "condition": 'token startswith "555"'},
# Mask all other phone numbers
{"strategy": "MASK"}
]
}
}
})
service = FilterService()
result = service.filter(
policy, "app", "doc-15",
"Test: 555-123-4567, Real: 800-867-5309"
)
print(result.filtered_text)
# Test: {{{REDACTED-phone-number}}}, Real: ***-***-****
Maintain referential integrity across documents
Use contexts to ensure the same PII value gets the same replacement across multiple documents.
from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService
policy = Policy.from_dict({
"name": "context-demo",
"identifiers": {
"emailAddress": {
"emailAddressFilterStrategies": [{"strategy": "HASH_SHA256_REPLACE"}]
}
}
})
service = FilterService()
# Filter multiple documents in the same context
doc1 = service.filter(
policy, "patient-123", "note-1",
"Patient emailed from john@example.com"
)
doc2 = service.filter(
policy, "patient-123", "note-2",
"Follow-up: john@example.com responded"
)
# The hash will be identical in both documents
print(doc1.filtered_text)
# Patient emailed from 5bb8a5cbf6...
print(doc2.filtered_text)
# Follow-up: 5bb8a5cbf6... responded
# Different context = different hash
doc3 = service.filter(
policy, "patient-456", "note-1",
"Patient emailed from john@example.com"
)
# Hash will be different in patient-456 context