Theory Lexer

(* Author: Peter Lammich *)
section ‹Basic Lexing›

theory Lexer
  imports Parser_Combinator
begin

― ‹We start by defining a lexer›
definition "lx_ws  repeat (any char_wspace)"
abbreviation "token  gen_token lx_ws"

definition [consuming]: "tk_plus = token (exactly ''+'')"
definition [consuming]: "tk_times  token (exactly ''*'')"
definition [consuming]: "tk_minus  token (exactly ''-'')"
definition [consuming]: "tk_div  token (exactly ''/'')"
definition [consuming]: "tk_power  token (exactly ''^'')"
definition [consuming]: "tk_lparen  token (exactly ''('')"
definition [consuming]: "tk_rparen  token (exactly '')'')"

abbreviation "additive_op  
  tk_plus  return (+)
 tk_minus  return (-)"
abbreviation "multiplicative_op 
  tk_times  return (*)
 tk_div  return (div)"
abbreviation "power_op 
  tk_power  return (λa b. a^nat b)"

abbreviation "lx_digit'  lx_digit with (λd. nat_of_char d - nat_of_char CHR ''0'')"

― ‹We convert the string to a number while parsing, using a parameterized parser.
   NB: Allows leading zeroes›
fun lx_nat_aux :: "nat  (char,nat) parser" where
  " lx_nat_aux acc ::= do { d  lx_digit'; lx_nat_aux (10*acc + d) }
   return acc"

definition [consuming]: "lx_nat  lx_digit'  lx_nat_aux"
definition [consuming]: "lx_int  exactly ''-'' *-- lx_nat with uminus o int  lx_nat with int"

definition [consuming]: "tk_int  token lx_int"

end