Skip to content

Grammar

This page is generated from the PhiSQL spec artifacts for v1.0. Do not edit it by hand; change the artifact and rebuild.

PhiSQL's grammar is defined in two equivalent forms. The ANTLR4 grammar is the executable normative reference that the reference implementation generates its parser from. The ISO 14977 EBNF below is a tool-independent presentation, cross-validated against the ANTLR4 grammar.

Keywords and entity-type identifiers are case-insensitive. User-defined names (policy names, dictionary names, custom-identifier classifications) are case-sensitive.

Production rules

Rule Defined in
document EBNF
statement EBNF
policy decl EBNF
configure stmt EBNF
setting list EBNF
setting EBNF
setting key EBNF
setting value EBNF
object value EBNF
array value EBNF
options clause EBNF
redact stmt EBNF
deidentify stmt EBNF
entity assignment EBNF
ignore stmt EBNF
define identifier stmt EBNF
define dictionary stmt EBNF
define section stmt EBNF
detect stmt EBNF
discovery stmt EBNF
in clause EBNF
where discovery EBNF
discovery predicate EBNF
projection list EBNF
projection EBNF
aggregate EBNF
column ref EBNF
findings ref EBNF
group by clause EBNF
limit clause EBNF
entity list EBNF
entity type EBNF
strategy expr EBNF
strategy name EBNF
strategy args EBNF
named arg EBNF
predicate EBNF
confidence predicate EBNF
compare op EBNF
string list EBNF
literal EBNF
id EBNF
string literal EBNF
string char EBNF
string escape EBNF
numeric literal EBNF
boolean literal EBNF
letter EBNF
digit EBNF
line comment EBNF
block comment EBNF
whitespace EBNF

Full grammar

document

document         = { statement , " ;

statement

statement        = policy decl
                 | configure stmt
                 | redact stmt
                 | deidentify stmt
                 | ignore stmt
                 | define identifier stmt
                 | define dictionary stmt
                 | define section stmt
                 | detect stmt
                 | discovery stmt ;

policy decl

policy decl      = "POLICY" , id , [ "DESCRIPTION" , string literal ] ;

configure stmt

configure stmt   = "CONFIGURE"
                 , ( "CRYPTO" , "KEY" , "FROM" , "ENV" , string literal
                   | "FPE" , "KEY" , "FROM" , "ENV" , string literal
                         , "TWEAK" , "FROM" , "ENV" , string literal
                   | ( "SPLITTING" | "PDF" | "POSTFILTERS" | "ANALYSIS" )
                         , "(" , setting list , ")"
                   | "GRAPHICAL" , "BOX" , "(" , setting list , ")" ) ;

setting list

setting list     = setting , { "," , setting } ;

setting

setting          = setting key , "=" , setting value ;

setting key

setting key      = id | string literal ;

setting value

setting value    = literal | object value | array value ;

object value

object value     = "(" , setting list , ")" ;

array value

array value      = "[" , [ setting value , { "," , setting value } ] , "]" ;

options clause

options clause   = "OPTIONS" , "(" , setting list , ")" ;

redact stmt

redact stmt      = "REDACT" , entity list
                 , [ "WITH" , strategy expr ]
                 , [ "WHERE" , predicate ]
                 , [ options clause ] ;

deidentify stmt

deidentify stmt  = "DEIDENTIFY" , entity assignment
                 , { "," , entity assignment } ;

entity assignment

entity assignment = entity type , "AS" , strategy expr , [ options clause ] ;

ignore stmt

ignore stmt      = "IGNORE"
                 , ( "TERMS" , string list
                   | "PATTERN" , string literal )
                 , [ "FOR" , entity list ]
                 , [ options clause ] ;

define identifier stmt

define identifier stmt
                 = "DEFINE" , "IDENTIFIER" , string literal
                 , "MATCHING" , string literal
                 , [ "GROUP" , numeric literal ]
                 , [ "CASE" , ( "SENSITIVE" | "INSENSITIVE" ) ]
                 , "WITH" , strategy expr
                 , [ "WHERE" , predicate ]
                 , [ options clause ] ;

define dictionary stmt

define dictionary stmt
                 = "DEFINE" , "DICTIONARY" , string literal
                 , "TERMS" , string list
                 , [ "FUZZY" , [ "SENSITIVITY" , id ] ]
                 , [ "CAPITALIZED" ]
                 , "WITH" , strategy expr
                 , [ options clause ] ;

define section stmt

define section stmt
                 = "DEFINE" , "SECTION"
                 , "START" , string literal
                 , "END" , string literal
                 , "WITH" , strategy expr
                 , [ options clause ] ;

detect stmt

detect stmt      = "DETECT" , "PHEYE"
                 , [ "LABELS" , string list ]
                 , [ "ENDPOINT" , string literal ]
                 , [ "MODEL" , string literal ]
                 , "WITH" , strategy expr
                 , [ "WHERE" , predicate ]
                 , [ options clause ] ;

discovery stmt

discovery stmt   = ( "FIND" , "PII" , in clause , [ where discovery ] )
                 | ( "DISCOVER" , "ENTITIES" , in clause , [ where discovery ] )
                 | ( "SCAN" , in clause , [ where discovery ] )
                 | ( "SELECT" , projection list , "FROM" , findings ref
                   , [ where discovery ]
                   , [ group by clause ]
                   , [ limit clause ] ) ;

in clause

in clause        = "IN" , string literal ;

where discovery

where discovery  = "WHERE" , discovery predicate ;

discovery predicate

discovery predicate
                 = ( column ref , "IN" , string list )
                 | ( column ref , compare op , ( string literal | numeric literal | boolean literal ) )
                 | ( "(" , discovery predicate , ")" )
                 | ( discovery predicate , ( "AND" | "OR" ) , discovery predicate ) ;

projection list

projection list  = projection , { "," , projection } ;

projection

projection       = "*"
                 | aggregate
                 | column ref ;

aggregate

aggregate        = ( "COUNT" | "AVG" | "SUM" | "MIN" | "MAX" )
                 , "(" , ( "*" | column ref ) , ")" ;

column ref

column ref       = id | "CONFIDENCE" ;

findings ref

findings ref     = [ id , "." ] , id ;

group by clause

group by clause  = "GROUP" , "BY" , column ref , { "," , column ref } ;

limit clause

limit clause     = "LIMIT" , numeric literal ;

entity list

entity list      = entity type , { "," , entity type } ;

entity type

entity type      = id
                 | "IDENTIFIER" , "(" , string literal , ")" ;

strategy expr

strategy expr    = strategy name , [ "(" , strategy args , ")" ] ;

strategy name

strategy name    = "MASK"
                 | "REDACT"
                 | "ENCRYPT"
                 | "FPE_ENCRYPT"
                 | "HASH_SHA256"
                 | "RANDOM_REPLACE"
                 | "STATIC_REPLACE"
                 | "LAST_4"
                 | "TRUNCATE"
                 | "TRUNCATE_TO_YEAR"
                 | "SHIFT"
                 | "RELATIVE"
                 | "ABBREVIATE" ;

strategy args

strategy args    = named arg , { "," , named arg } ;

named arg

named arg        = id , "=" , setting value ;

predicate

predicate        = confidence predicate
                 | "(" , predicate , ")"
                 | predicate , ( "AND" | "OR" ) , predicate ;

confidence predicate

confidence predicate = "CONFIDENCE" , compare op , numeric literal ;

compare op

compare op       = ">" | ">=" | "<" | "<=" | "=" ;

string list

string list      = "(" , string literal , { "," , string literal } , ")" ;

literal

literal          = string literal | numeric literal | boolean literal | id ;

id

id               = letter , { letter | digit | "_" } ;

string literal

string literal   = "'" , { string char | string escape } , "'" ;

string char

string char      = ? any character except "'" or "\" or newline ? ;

string escape

string escape    = "\" , ? any character ? ;

numeric literal

numeric literal  = [ "-" ] , digit , { digit }
                 , [ "." , digit , { digit } ] ;

boolean literal

boolean literal  = "TRUE" | "FALSE" ;

letter

letter           = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J"
                 | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T"
                 | "U" | "V" | "W" | "X" | "Y" | "Z"
                 | "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j"
                 | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t"
                 | "u" | "v" | "w" | "x" | "y" | "z"
                 | "_" ;

digit

digit            = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;

line comment

line comment     = "--" , { ? any character except newline ? } ;

block comment

block comment    = "/*" , { ? any character ? } , "*/" ;

whitespace

whitespace       = ? space, tab, carriage return, or newline ? ;