Examples

The following examples demonstrate common use cases. All examples assume you have phileas-python installed:

pip install phileas-redact

Redact email addresses

from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService

policy = Policy.from_dict({
    "name": "email-redact",
    "identifiers": {
        "emailAddress": {
            "emailAddressFilterStrategies": [{"strategy": "REDACT"}]
        }
    }
})

service = FilterService()
result = service.filter(
    policy, "app", "doc-1",
    "Please contact john.doe@example.com or support@company.org for help."
)
print(result.filtered_text)
# Please contact {{{REDACTED-email-address}}} or {{{REDACTED-email-address}}} for help.

Mask credit card numbers (show last 4 digits)

policy = Policy.from_dict({
    "name": "cc-last4",
    "identifiers": {
        "creditCard": {
            "creditCardFilterStrategies": [{"strategy": "LAST_4"}]
        }
    }
})

result = service.filter(policy, "app", "doc-2", "Charged to card 4111 1111 1111 1111.")
print(result.filtered_text)
# Charged to card ************1111.

Hash Social Security Numbers

Replace an SSN with its SHA-256 hash to maintain referential consistency without exposing the value.

policy = Policy.from_dict({
    "name": "ssn-hash",
    "identifiers": {
        "ssn": {
            "ssnFilterStrategies": [{"strategy": "HASH_SHA256_REPLACE"}]
        }
    }
})

result = service.filter(policy, "app", "doc-3", "SSN: 123-45-6789")
print(result.filtered_text)
# SSN: 01a54629efb952287e554eb23ef69c52097a75aecc0e3a93ca0855ab6d7a31a0

Use a custom redaction format

The %t placeholder in redactionFormat is replaced with the filter type name.

policy = Policy.from_dict({
    "name": "custom-format",
    "identifiers": {
        "phoneNumber": {
            "phoneNumberFilterStrategies": [
                {"strategy": "REDACT", "redactionFormat": "[PHONE-REDACTED]"}
            ]
        }
    }
})

result = service.filter(policy, "app", "doc-4", "Call me at 555-867-5309.")
print(result.filtered_text)
# Call me at [PHONE-REDACTED].

Replace with a static value

policy = Policy.from_dict({
    "name": "static-ip",
    "identifiers": {
        "ipAddress": {
            "ipAddressFilterStrategies": [
                {"strategy": "STATIC_REPLACE", "staticReplacement": "0.0.0.0"}
            ]
        }
    }
})

result = service.filter(policy, "app", "doc-5", "Server at 10.0.0.42 is down.")
print(result.filtered_text)
# Server at 0.0.0.0 is down.

Shift a date forward

Move detected dates forward by 2 years and 15 days for de-identification.

policy = Policy.from_dict({
    "name": "date-shift",
    "identifiers": {
        "date": {
            "dateFilterStrategies": [
                {"strategy": "SHIFT_DATE", "shiftYears": 2, "shiftDays": 15}
            ]
        }
    }
})

result = service.filter(policy, "app", "doc-6", "Born on 01/15/1990.")
print(result.filtered_text)
# Born on 01/30/1992.

Filter multiple PII types at once

policy = Policy.from_dict({
    "name": "multi",
    "identifiers": {
        "emailAddress": {
            "emailAddressFilterStrategies": [{"strategy": "REDACT"}]
        },
        "ssn": {
            "ssnFilterStrategies": [{"strategy": "REDACT"}]
        },
        "phoneNumber": {
            "phoneNumberFilterStrategies": [{"strategy": "MASK"}]
        },
        "creditCard": {
            "creditCardFilterStrategies": [{"strategy": "LAST_4"}]
        }
    }
})

text = (
    "Name: Jane Smith, SSN: 987-65-4321, "
    "Phone: (555) 123-4567, Email: jane@example.com, "
    "Card: 5500 0000 0000 0004."
)

result = service.filter(policy, "app", "doc-7", text)
print(result.filtered_text)
# Name: Jane Smith, SSN: {{{REDACTED-ssn}}}, Phone: (***) ***-****, 
# Email: {{{REDACTED-email-address}}}, Card: ************0004.

Inspect spans

Each Span in result.spans describes a single match:

for span in result.spans:
    print(
        f"[{span.filter_type:20s}] "
        f"chars {span.character_start:3d}–{span.character_end:3d}  "
        f"'{span.text}'  →  '{span.replacement}'  "
        f"(confidence={span.confidence:.2f})"
    )

Skip specific values with an ignore list

Prevent specific values from being redacted even when they match a filter pattern.

policy = Policy.from_dict({
    "name": "ignored-emails",
    "identifiers": {
        "emailAddress": {
            "emailAddressFilterStrategies": [{"strategy": "REDACT"}],
            "ignored": ["noreply@internal.com", "admin@internal.com"]
        }
    }
})

result = service.filter(
    policy, "app", "doc-8",
    "Contact noreply@internal.com or john@example.com."
)
print(result.filtered_text)
# Contact noreply@internal.com or {{{REDACTED-email-address}}}.

Skip values matching a regex pattern

Use ignoredPatterns at the top level to exclude matches by pattern.

policy = Policy.from_dict({
    "name": "pattern-ignore",
    "identifiers": {
        "phoneNumber": {
            "phoneNumberFilterStrategies": [{"strategy": "REDACT"}]
        }
    },
    "ignoredPatterns": ["\\d{3}-555-\\d{4}"]  # keep 555 numbers unchanged
})

result = service.filter(
    policy, "app", "doc-9",
    "Call 555-555-1234 (public) or 800-867-5309 (private)."
)
print(result.filtered_text)
# Call 555-555-1234 (public) or {{{REDACTED-phone-number}}} (private).

Redact ZIP codes based on population

Use the population condition to redact only ZIP codes whose 2020 US Census population falls below (or above) a threshold. ZIP codes not found in the dataset are not redacted.

policy = Policy.from_dict({
    "name": "zip-population",
    "identifiers": {
        "zipCode": {
            "zipCodeFilterStrategies": [
                {"strategy": "REDACT", "condition": "population < 20000"}
            ]
        }
    }
})

result = service.filter(
    policy, "app", "doc-zip",
    "Offices in 90210 and 10001."
)
print(result.filtered_text)
# Offices in {{{REDACTED-zip-code}}} and 10001.
# (90210 population ≈ 21,134 — below 20,000 threshold → redacted;
#  10001 population ≈ 32,612 — above threshold → kept)

Multiple strategies can handle different population ranges differently:

policy = Policy.from_dict({
    "name": "zip-tiered",
    "identifiers": {
        "zipCode": {
            "zipCodeFilterStrategies": [
                {"strategy": "REDACT",             "condition": "population < 20000"},
                {"strategy": "STATIC_REPLACE",
                 "staticReplacement": "[LARGE-ZIP]", "condition": "population >= 20000"},
            ]
        }
    }
})

Load a policy from a JSON file

import json
from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService

with open("policy.json") as f:
    policy = Policy.from_json(f.read())

service = FilterService()
result = service.filter(policy, "app", "doc-10", "Text to filter...")

Load a policy from a YAML file

from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService

with open("policy.yaml") as f:
    policy = Policy.from_yaml(f.read())

service = FilterService()
result = service.filter(policy, "app", "doc-11", "Text to filter...")

An example policy.yaml:

name: my-policy
identifiers:
  emailAddress:
    emailAddressFilterStrategies:
      - strategy: REDACT
        redactionFormat: "{{{REDACTED-%t}}}"
  ssn:
    ssnFilterStrategies:
      - strategy: MASK
ignored:
  - admin@example.com
ignoredPatterns:
  - "\\d{3}-555-\\d{4}"

NER-based person detection with ph-eye

Requires a running ph-eye service.

from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService

policy = Policy.from_dict({
    "name": "ner-demo",
    "identifiers": {
        "phEye": [
            {
                "endpoint": "http://localhost:8080",
                "labels": ["PERSON"],
                "thresholds": {"PERSON": 0.85},
                "phEyeFilterStrategies": [{"strategy": "REDACT"}]
            }
        ]
    }
})

service = FilterService()
result = service.filter(
    policy, "app", "doc-12",
    "Dr. Alice Johnson reviewed the case."
)
print(result.filtered_text)
# Dr. {{{REDACTED-person}}} reviewed the case.

Redact custom patterns with regex

Use custom pattern filters to detect domain-specific PII not covered by built-in filters.

from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService

policy = Policy.from_dict({
    "name": "custom-patterns",
    "identifiers": {
        "patterns": [
            {
                "pattern": r"EMP-\d{6}",
                "label": "employee-id",
                "patternFilterStrategies": [{"strategy": "REDACT"}]
            },
            {
                "pattern": r"[A-Z]{2}\d{6}",
                "label": "passport-number",
                "patternFilterStrategies": [{"strategy": "MASK"}]
            }
        ]
    }
})

service = FilterService()
result = service.filter(
    policy, "app", "doc-13",
    "Employee EMP-123456 has passport AB123456."
)
print(result.filtered_text)
# Employee {{{REDACTED-employee-id}}} has passport *********.

Redact terms from a dictionary

Use the dictionaries filter to redact known names or sensitive terms.

from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService

policy = Policy.from_dict({
    "name": "dictionary-demo",
    "identifiers": {
        "dictionaries": [
            {
                "terms": ["John Smith", "Jane Doe", "confidential", "proprietary"],
                "dictionaryFilterStrategies": [{"strategy": "REDACT"}]
            }
        ]
    }
})

service = FilterService()
result = service.filter(
    policy, "app", "doc-14",
    "John Smith shared confidential data with Jane Doe about the proprietary algorithm."
)
print(result.filtered_text)
# {{{REDACTED-dictionary}}} shared {{{REDACTED-dictionary}}} data with {{{REDACTED-dictionary}}} about the {{{REDACTED-dictionary}}} algorithm.

Use conditions to filter selectively

Apply different strategies based on the matched value using condition expressions.

from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService

policy = Policy.from_dict({
    "name": "conditional-filtering",
    "identifiers": {
        "phoneNumber": {
            "phoneNumberFilterStrategies": [
                # Redact phone numbers starting with 555 (test numbers)
                {"strategy": "REDACT", "condition": 'token startswith "555"'},
                # Mask all other phone numbers
                {"strategy": "MASK"}
            ]
        }
    }
})

service = FilterService()
result = service.filter(
    policy, "app", "doc-15",
    "Test: 555-123-4567, Real: 800-867-5309"
)
print(result.filtered_text)
# Test: {{{REDACTED-phone-number}}}, Real: ***-***-****

Maintain referential integrity across documents

Use contexts to ensure the same PII value gets the same replacement across multiple documents.

from phileas.policy.policy import Policy
from phileas.services.filter_service import FilterService

policy = Policy.from_dict({
    "name": "context-demo",
    "identifiers": {
        "emailAddress": {
            "emailAddressFilterStrategies": [{"strategy": "HASH_SHA256_REPLACE"}]
        }
    }
})

service = FilterService()

# Filter multiple documents in the same context
doc1 = service.filter(
    policy, "patient-123", "note-1",
    "Patient emailed from john@example.com"
)
doc2 = service.filter(
    policy, "patient-123", "note-2",
    "Follow-up: john@example.com responded"
)

# The hash will be identical in both documents
print(doc1.filtered_text)
# Patient emailed from 5bb8a5cbf6...

print(doc2.filtered_text)
# Follow-up: 5bb8a5cbf6... responded

# Different context = different hash
doc3 = service.filter(
    policy, "patient-456", "note-1",
    "Patient emailed from john@example.com"
)
# Hash will be different in patient-456 context