(* ========================================================================= *) (* Lexical analyzer, type and preterm parsers. *) (* *) (* John Harrison, University of Cambridge Computer Laboratory *) (* *) (* (c) Copyright, University of Cambridge 1998 *) (* (c) Copyright, John Harrison 1998-2007 *) (* ========================================================================= *) (* Slightly more verbose error messages added by thales, July 2010 *) module Parser_verbose = struct let string_of_lexcode = function Ident s -> s |Resword s -> s;; let string_of_lexcodel r = itlist (fun a b -> string_of_lexcode a^" "^b) r "";; let err_lex_rest prs i = try prs i with Failure s -> failwith (s^ "\n unread tokens:\n"^string_of_lexcodel i);; let parse_pretype_verbose = let btyop n n' x y = Ptycon(n,[x;y]) and mk_apptype = function ([s],[]) -> s | (tys,[c]) -> Ptycon(c,tys) | _ -> failwith "Bad type construction" and type_atom input = match input with (Ident s)::rest -> (try pretype_of_type(assoc s (type_abbrevs())) with Failure _ -> if try get_type_arity s = 0 with Failure _ -> false then Ptycon(s,[]) else Utv(s)),rest | _ -> raise Noparse and type_constructor input = match input with (Ident s)::rest -> if try get_type_arity s > 0 with Failure _ -> false then s,rest else raise Noparse | _ -> raise Noparse in let rec pretype i = rightbin sumtype (a (Resword "->")) (btyop "fun") "proper use of type operator (->)" i and sumtype i = rightbin prodtype (a (Ident "+")) (btyop "sum") "proper use of type operator (+)" i and prodtype i = rightbin carttype (a (Ident "#")) (btyop "prod") "proper use of type operator (#)" i and carttype i = leftbin apptype (a (Ident "^")) (btyop "cart") " proper use of type operator (^) " i and apptype i = (atomictypes ++ (type_constructor >> (fun x -> [x]) || nothing) >> mk_apptype) i and atomictypes i = (((a (Resword "(")) ++ typelist ++ a (Resword ")") >> (snd o fst)) || type_atom >> (fun x -> [x])) i and typelist i = listof pretype (a (Ident ",")) "comma separated list for type constructor" i in pretype;; let parse_preterm_verbose = let rec pfrees ptm acc = match ptm with Varp(v,pty) -> if v = "" & pty = dpty then acc else if can get_const_type v or can num_of_string v or exists (fun (w,_) -> v = w) (!the_interface) then acc else insert ptm acc | Constp(_,_) -> acc | Combp(p1,p2) -> pfrees p1 (pfrees p2 acc) | Absp(p1,p2) -> subtract (pfrees p2 acc) (pfrees p1 []) | Typing(p,_) -> pfrees p acc in let pdest_eq (Combp(Combp(Varp(("="|"<=>"),_),l),r)) = l,r in let pmk_let (letbindings,body) = let vars,tms = unzip (map pdest_eq letbindings) in let lend = Combp(Varp("LET_END",dpty),body) in let abs = itlist (fun v t -> Absp(v,t)) vars lend in let labs = Combp(Varp("LET",dpty),abs) in rev_itlist (fun x f -> Combp(f,x)) tms labs in let pmk_vbinder(n,v,bod) = if n = "\\" then Absp(v,bod) else Combp(Varp(n,dpty),Absp(v,bod)) in let pmk_binder(n,vs,bod) = itlist (fun v b -> pmk_vbinder(n,v,b)) vs bod in let pmk_set_enum ptms = itlist (fun x t -> Combp(Combp(Varp("INSERT",dpty),x),t)) ptms (Varp("EMPTY",dpty)) in let pgenvar = let gcounter = ref 0 in fun () -> let count = !gcounter in (gcounter := count + 1; Varp("GEN%PVAR%"^(string_of_int count),dpty)) in let pmk_exists(v,ptm) = Combp(Varp("?",dpty),Absp(v,ptm)) in let pmk_list els = itlist (fun x y -> Combp(Combp(Varp("CONS",dpty),x),y)) els (Varp("NIL",dpty)) in let pmk_bool = let tt = Varp("T",dpty) and ff = Varp("F",dpty) in fun b -> if b then tt else ff in let pmk_char c = let lis = map (fun i -> pmk_bool((c / (1 lsl i)) mod 2 = 1)) (0--7) in itlist (fun x y -> Combp(y,x)) lis (Varp("ASCII",dpty)) in let pmk_string s = let ns = map (fun i -> Char.code(String.get s i)) (0--(String.length s - 1)) in pmk_list(map pmk_char ns) in let pmk_setcompr (fabs,bvs,babs) = let v = pgenvar() in let bod = itlist (curry pmk_exists) bvs (Combp(Combp(Combp(Varp("SETSPEC",dpty),v),babs),fabs)) in Combp(Varp("GSPEC",dpty),Absp(v,bod)) in let pmk_setabs (fabs,babs) = let evs = let fvs = pfrees fabs [] and bvs = pfrees babs [] in if length fvs <= 1 or bvs = [] then fvs else intersect fvs bvs in pmk_setcompr (fabs,evs,babs) in let rec mk_precedence infxs prs inp = match infxs with (s,(p,at))::_ -> let topins,rest = partition (fun (s',pat') -> pat' = (p,at)) infxs in (if at = "right" then rightbin else leftbin) (mk_precedence rest prs) (end_itlist (||) (map (fun (s,_) -> a (Ident s)) topins)) (fun (Ident op) x y -> Combp(Combp(Varp(op,dpty),x),y)) ("preterm after binary operator ("^s^")") inp | _ -> prs inp in let pmk_geq s t = Combp(Combp(Varp("GEQ",dpty),s),t) in let pmk_pattern ((pat,guards),res) = let x = pgenvar() and y = pgenvar() in let vs = pfrees pat [] and bod = if guards = [] then Combp(Combp(Varp("_UNGUARDED_PATTERN",dpty),pmk_geq pat x), pmk_geq res y) else Combp(Combp(Combp(Varp("_GUARDED_PATTERN",dpty),pmk_geq pat x), hd guards), pmk_geq res y) in Absp(x,Absp(y,itlist (curry pmk_exists) vs bod)) in let pretype = parse_pretype_verbose and string inp = match inp with Ident s::rst when String.length s >= 2 & String.sub s 0 1 = "\"" & String.sub s (String.length s - 1) 1 = "\"" -> String.sub s 1 (String.length s - 2),rst | _ -> raise Noparse and singleton1 x = [x] and lmk_ite (((((_,b),_),l),_),r) = Combp(Combp(Combp(Varp("COND",dpty),b),l),r) and lmk_typed = function (p,[]) -> p | (p,[ty]) -> Typing(p,ty) | _ -> fail() and lmk_let (((_,bnds),_),ptm) = pmk_let (bnds,ptm) and lmk_binder ((((s,h),t),_),p) = pmk_binder(s,h::t,p) and lmk_setenum(l,_) = pmk_set_enum l and lmk_setabs(((l,_),r),_) = pmk_setabs(l,r) and lmk_setcompr(((((f,_),vs),_),b),_) = pmk_setcompr(f,pfrees vs [],b) and lmk_decimal ((_,l0),ropt) = let l,r = if ropt = [] then l0,"1" else let r0 = hd ropt in let n_l = num_of_string l0 and n_r = num_of_string r0 in let n_d = power_num (Int 10) (Int (String.length r0)) in let n_n = n_l */ n_d +/ n_r in string_of_num n_n,string_of_num n_d in Combp(Combp(Varp("DECIMAL",dpty),Varp(l,dpty)),Varp(r,dpty)) and lmk_univ((_,pty),_) = Typing(Varp("UNIV",dpty),Ptycon("fun",[pty;Ptycon("bool",[])])) and any_identifier = function ((Ident s):: rest) -> s,rest | _ -> raise Noparse and identifier = function ((Ident s):: rest) -> if can get_infix_status s or is_prefix s or parses_as_binder s then raise Noparse else s,rest | _ -> raise Noparse and binder = function ((Ident s):: rest) -> if parses_as_binder s then s,rest else raise Noparse | _ -> raise Noparse and pre_fix = function ((Ident s):: rest) -> if is_prefix s then s,rest else raise Noparse | _ -> raise Noparse in let rec preterm i = mk_precedence (infixes()) (typed_appl_preterm) i and nocommapreterm i = let infs = filter (fun (s,_) -> s <> ",") (infixes()) in mk_precedence infs typed_appl_preterm i and typed_appl_preterm i = (appl_preterm ++ possibly (a (Resword ":") ++ pretype >> snd) >> lmk_typed) i and appl_preterm i = (pre_fix ++ appl_preterm >> (fun (x,y) -> Combp(Varp(x,dpty),y)) || binder_preterm ++ many binder_preterm >> (fun (h,t) -> itlist (fun x y -> Combp(y,x)) (rev t) h)) i and binder_preterm i = (a (Resword "let") ++ leftbin (preterm >> singleton1) (a (Resword "and")) (K (@)) "binding" ++ a (Resword "in") ++ preterm >> lmk_let || binder ++ typed_apreterm ++ many typed_apreterm ++ a (Resword ".") ++ preterm >> lmk_binder || atomic_preterm) i and typed_apreterm i = (atomic_preterm ++ possibly (a (Resword ":") ++ pretype >> snd) >> lmk_typed) i and atomic_preterm i = (try_user_parser || (a (Resword "(") ++ a (Resword ":")) ++ pretype ++ a (Resword ")") >> lmk_univ || (a (Resword "(") ++ a (Resword ":")) ++ pretype >>(fun _ -> failwith "closing right parenthesis on universe expected") || (a (Resword "(") ++ a (Resword ":")) >>(fun _ -> failwith "pretype in universe construction expected") || string >> pmk_string || a (Resword "(") ++ (any_identifier >> (fun s -> Varp(s,dpty))) ++ a (Resword ")") >> (snd o fst) || a (Resword "(") ++ preterm ++ a (Resword ")") >> (snd o fst) (* || a (Resword "(") ++ preterm >> (fun _ -> failwith "closing right parenthesis expected") || a (Resword "(") >> (fun _ -> failwith "preterm following left parenthesis expected") *) || a (Resword "if") ++ preterm ++ a (Resword "then") ++ preterm ++ a (Resword "else") ++ preterm >> lmk_ite || (a (Resword "if") ) ++ preterm ++ a (Resword "then") ++ preterm ++ a (Resword "else") >> (fun _ -> failwith "malformed else clause") || (a (Resword "if") ) ++ preterm ++ a (Resword "then") ++ preterm >> (fun _ -> failwith "missing else following then clause") || (a (Resword "if") ) ++ preterm ++ a (Resword "then") >> (fun _ -> failwith "malformed then clause in if-then-else statement") || (a (Resword "if") ) ++ preterm >> (fun _ -> failwith "missing 'then' reserved word if-then-else statement") || (a (Resword "if") ) >> (fun _ -> failwith "malformed if-then-else") || a (Resword "[") ++ elistof preterm (a (Resword ";")) "semicolon separated list of preterms" ++ a (Resword "]") >> (pmk_list o snd o fst) || a (Resword "[") ++ elistof preterm (a (Resword ";")) "semicolon separated list of preterms" >> (fun _ -> failwith "closing square bracket on list expected") || a (Resword "{") ++ (elistof nocommapreterm (a (Ident ",")) "comma separated list of preterms" ++ a (Resword "}") >> lmk_setenum || preterm ++ a (Resword "|") ++ preterm ++ a (Resword "}") >> lmk_setabs || preterm ++ a (Resword "|") ++ preterm ++ a (Resword "|") ++ preterm ++ a (Resword "}") >> lmk_setcompr) >> snd || a (Resword "{") >> (fun _ -> failwith "malformed set {}") || a (Resword "match") ++ preterm ++ a (Resword "with") ++ clauses >> (fun (((_,e),_),c) -> Combp(Combp(Varp("_MATCH",dpty),e),c)) || a (Resword "match") >> (fun _ -> failwith "malformed match-with statement") || a (Resword "function") ++ clauses >> (fun (_,c) -> Combp(Varp("_FUNCTION",dpty),c)) || a (Resword "function") >> (fun _ -> failwith "malformed function and pattern clauses") || a (Ident "#") ++ identifier ++ possibly (a (Resword ".") ++ identifier >> snd) >> lmk_decimal || a (Ident "#") >> (fun _ -> failwith "malformed numerical # identifier") || identifier >> (fun s -> Varp(s,dpty))) i and pattern i = (preterm ++ possibly (a (Resword "when") ++ preterm >> snd)) i and clause i = ((pattern ++ (a (Resword "->") ++ preterm >> snd)) >> pmk_pattern) i and clauses i = ((possibly (a (Resword "|")) ++ listof clause (a (Resword "|")) "pattern-match clause" >> snd) >> end_itlist (fun s t -> Combp(Combp(Varp("_SEQPATTERN",dpty),s),t))) i in (fun inp -> match inp with [Ident s] -> Varp(s,dpty),[] | _ -> preterm inp);; end;;