Practice (K)

// shift-reduce parser
// stevan apter, 11.01


//////////////////  
// file parse.k //
//////////////////

// precedence parser
// stevan apter, 11.01
// tom niemann's parsing paper:  http://epaperpress.com

/ operator table:

Z:_ci 255                               / end-of-string
S:":"                                   / assignment
G:"()"                                  / grouping
L:","                                   / list

O:(,S                                   / assignment
   ("&";"and";"|";"or")                 / and, or
   ("~";"=";"<";">";"<>";"<=";">=")     / relations
   "+-"                                 / plus, minus
   "*/"                                 / times, divide
   ,"^"                                 / power
   ("NEG";"neg";"NOT";"not")            / negate, not
   ,"if"                                / ternary functions
   ("min";"max")                        / binary functions
   ("sum";"avg")                        / unary functions
   ,L                                   / arg separator
   ,G 0                                 / left paren
   ,G 1                                 / right paren
   ,Z)                                  / end of string


/ type table:

R:+((,S                           ;,/(0 1 0;0 2 0;0 4 0;0 6 0)+\:/:(1 0 1;2 0 2;4 0 4))
    (,"~"                         ;(1 1 1;1 1 2;1 1 4;1 2 1;1 2 2;1 2 4;1 4 1;1 4 2;1 4 4))
    (("+";"-";"*";"min";"max")    ;(1 1 1;2 2 1;2 1 2;2 2 2))
    ("/^"                         ;(2 1 1;2 2 1;2 1 2;2 2 2))
    (,"="                         ;(1 1 1;1 2 1;1 1 2;1 2 2;1 4 4))
    (("NEG";"neg")                ;(1 1;2 2))
    (("NOT";"not")                ;,1 1)
    (("&";"and";"|";"or")         ;,1 1 1)
    (("<";">";"<>";"<=";">=")     ;(1 1 1;1 1 2;1 2 1;1 2 2))
    (,"sum"                       ;(1 1;2 2))
    (,"avg"                       ;(2 1;2 2))
    (,"if"                        ;(1 1 1 1;2 1 2 2;4 1 4 4)))

V:2 2 2 2 2 2 1 3 2 1 -1 0 -2 0 0       / valence

/ shift-reduce table:

/   :&=+*^-321,()Zc : s -> right assoc, r -> left assoc

P:("rsssssssssrsrrc"    / :
   "rrssssssssrsrrc"    / & |
   "rrrsssssssrsrrc"    / = < > <> <= >=
   "rrrrssssssrsrrc"    / + -
   "rrrrrsssssrsrrc"    / * /
   "rrrrrrssssrsrrc"    / ^
   "rrrrrrssssrsrrc"    / unary - ~
   "rrrrrrrrssrsrrc"    / 3
   "rrrrrrrrrsrsrrc"    / 2
   "rrrrrrrrrrrsrrc"    / 1
   "rrrrrrrrrrrrrrc"    / ,
   "sssssssssssssRc"    / (
   "rrrrrrrrrrrOrrc"    / )
   "ssssssssssssLac")   / Z

/ L -> K translation:

trans:{(K[1],,x)K[0]?x}                 / translate to K

K:+(("<>";"(~=)")                       / L -> K
    ("<=";"(~>)")
    (">=";"(~<)")
    ("/";"%")
    ("NOT";"~:")
    ("not";"~:")
    ("NEG";"-:")
    ("neg";"-:")
    ("and";"&")
    ("or";"|")
    ("min";"&")
    ("max";"|")
    ("if";"IF")
    ("sum";"SUM")
    ("avg";"AVG")
    (S;"SET"))

/ tokenizing parameters:

W:("<>";"<=";">=")                                             / 2 character operators
U:(("-";"NEG");("~";"NOT"))                                    / ambivalent unaries

B:" "                                                          / blank
D:"."                                                          / decimal point
N:"0123456789"                                                 / numerics
A:"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVXYZ_"       / alphas
Q:"\""                                                         / quotation
F:?(,//O)_dvl Z,A                                              / symbolics
C:(B;A;D;N;Q;F)                                                / token classes

state:{{.[x;(;y);:;z]}/[((#x),256)#-1;y;+x]}                   / construct state table

/        B A D N Q O            / token classes
M:state[(1 2 2 4 7 9            / 0 start
         1 2 2 4 7 9            / 1 blank
         1 3 3 3 7 9            / 2 name1
         1 3 3 3 7 9            / 3 name2
         1 2 5 4 7 9            / 4 num1

         1 2 2 6 7 9            / 5 decimal
         1 2 2 6 7 9            / 6 num2
         7 7 7 7 8 7            / 7 open quote
         1 2 2 4 7 9            / 8 close quote
         1 2 2 4 7 9)]_ic C     / 9 operator

I:0 1 3 6 8                     / state endpoints

/ tokenize:

token:{((()lit/()one/fsm chr"",x)two/W)una/U}                / tokenize
fsm:{bla cut[x;0 M\_ic x]}                                   / scan input
bla:{x _di&B=*:'x}                                           / eliminate blanks
cut:{{:[1=#x;*x;x]}'(&(~=)':I _binl y)_ x}                   / cut into vectors and atoms
one:{x,:[&/y _lin F;y;,y]}                                   / operator strings -> atoms
lit:{x,,:[~@y;y;y _in A,D,N;,y;y]}                           / "a" -> ,"a"
two:{@[x;i;:[;y]]_di -1+i:&0{&/x~'(z;y)}[y]':x}              / 2 char operators -> 1
una:{@[x;&{((z _in,/O)&~z~G 1)&y~x}[*y]':"(",x;:[;y 1]]}     / e.g. - x -> NEG x (U)
chr:{:[~&/b:x _lin,/C;ERROR[(1+b?0)#x;"character error"];x]} / character error

/ parse:

parse:{:[#x;**|act/("";x,,Z;,Z;"");x]}                       / parse
act:{t:P . class'x[2 1;0];trace[t]. 2#x;H[`$t]. x}           / action step
T:15;trace:{[t;j;i]if[T;`0:,t,(T$j)," | ",(-T)$1_,/B,'i]}    / trace
class:{(|/'x _in/:O)?1}                                      / syntax class
cons:{:[&/x _lin D,N;(),x;Q~*x;"`",x;("GET";"`\"",x,"\"")]}  / constant: literal or not
H.a:{[j;i;o;v]:[1=#v;(j;i;o;v);ERROR[j;"syntax error"]]}     / accept
H.c:{[j;i;o;v](j,B,*i;1_ i;o;(,cons i 0),v)}                 / constant -> v-stack
H.s:{[j;i;o;v](j,B,*i;1_ i;(,*i),o;v)}                       / shift operator -> o-stack
H.r:{[j;i;o;v]:[0<n;rp;rn][j;i;o;v;n:V class o 0]}           / reduce v-stack by *o
H.O:{[j;i;o;v]ERROR[j;"missing operator"]}                   / missing operator
H.L:{[j;i;o;v]ERROR[j;"missing ("]}                          / missing (
H.R:{[j;i;o;v]ERROR[j;"missing )"]}                          / missing )
rn:{[j;i;o;v;n](j;i;(-n)_ o;v)}                              / reduce without result
rp:{[j;i;o;v;n]val[j;v;n];(j;i;1_ o;ret[(,*o),|n#v],n _ v)}  / reduce with result
ret:{,:[("GET"~*x 1)&S~*x;.[x;1 0;:;"RET"];x]}               / GET -> RET in SET
val:{[j;v;n]if[n>#v;ERROR[j;"missing argument"]]}            / check missing argument

/ errors:

ERROR:{`0:,x;'((-1+#x)#""),"^ ",y}                           / signal error

/ type:

type:{:[~#x;_n;4:x;tlit x;tvar x;TYPE@. x 1;texp[x;topr@*x;_f'1_ x]]}   / type check
tvar:{x[0]_in("GET";"LET")}                                  / variable
tlit:{:["`"=*x;4;D _in x;2;1]}                               / lit: char, float, int
topr:{R[1;(x _in/:*R)?1]}                                    / type of operator
texp:{:[|/b:(1_'y)~\:z;*y b?1;ERROR[string x;"type"]]}       / type of expression
TYPE:{__abs 4:. x}                                           / type name

/ evaluate:

eval:{:[~#x;x;~4:x;_f[trans x 0]._f'1_ x;"GET"~x;GET;"SET"~x;SET;. x]} / simple evaluation
/ SET:{y}                                                    / comment
/ SET:{`0:,$x;y}                                             / trace
SET:{.[x;();:;y];y}                                          / set value
RET:{x}                                                      / return value
GET:{. x}                                                    / get value
SUM:+/;AVG:{(+/x)%#x}                                        / aggregation
IF:{:[x;y;z]}                                                / conditional

/ references:

refs:{:[4:y;();~x~*y;?,/_f[x]'1_ y;. y 1]}                   / unique list of refs
gets:refs["GET"]                                             / unique list of gets
sets:refs["SET"]                                             / unique list of sets

/ miscellaneous:

string:{:[~#x;"";4:x;x;trans[*x],1!"][",1_,/(";",_f@)'1_ x]}      / construct k-executable
run:*|(type;eval)@\:parse token@                                  / typecheck, evaluate
int:{while[~"\\\\"~a:{`0:"> ";0:`}[];`0:,:[*r:.[run;a;:];r 1;5:r 1]]}   
                                                           / mini-interpreter - \\ to exit

//////////////////
// file table.k //
//////////////////

\d L
\l parse
\d ^
/ define table and field types
Table[`f`g`h]:(10#`one`two`three`four`five;!10;1.*10 _draw 100)
Type[`f`g`h]:4 1 2
  
/ define GET and TYPE functions for this app
                                                  
L.GET:{Table x}
L.TYPE:{Type x}

L.run "g+3"                     / direct evaluation

/ OR:

L.T:0                           / turn off trace
p:L.parse L.token "g+3"         / parse
L.type p                        / type-check p
r:L.gets p                      / unique list of columns
s:L.string p                    / stringify
`L . s                          / execute in L (`L[s] also works)