1 (* ========================================================================= *)
2 (* Lexical analyzer, type and preterm parsers. *)
4 (* John Harrison, University of Cambridge Computer Laboratory *)
6 (* (c) Copyright, University of Cambridge 1998 *)
7 (* (c) Copyright, John Harrison 1998-2007 *)
8 (* ========================================================================= *)
12 (* ------------------------------------------------------------------------- *)
13 (* Need to have this now for set enums, since "," isn't a reserved word. *)
14 (* ------------------------------------------------------------------------- *)
16 parse_as_infix (",",(14,"right"));;
18 (* ------------------------------------------------------------------------- *)
19 (* Basic parser combinators. *)
20 (* ------------------------------------------------------------------------- *)
24 let (||) parser1 parser2 input =
26 with Noparse -> parser2 input;;
28 let (++) parser1 parser2 input =
29 let result1,rest1 = parser1 input in
30 let result2,rest2 = parser2 rest1 in
31 (result1,result2),rest2;;
33 let rec many prs input =
34 try let result,next = prs input in
35 let results,rest = many prs next in
36 (result::results),rest
37 with Noparse -> [],input;;
39 let (>>) prs treatment input =
40 let result,rest = prs input in
41 treatment(result),rest;;
43 let fix err prs input =
45 with Noparse -> failwith (err ^ " expected");;
47 let rec listof prs sep err =
48 prs ++ many (sep ++ fix err prs >> snd) >> (fun (h,t) -> h::t);;
50 let nothing input = [],input;;
52 let elistof prs sep err =
53 listof prs sep err || nothing;;
55 let leftbin prs sep cons err =
56 prs ++ many (sep ++ fix err prs) >>
57 (fun (x,opxs) -> let ops,xs = unzip opxs in
58 itlist2 (fun op y x -> cons op x y) (rev ops) (rev xs) x);;
60 let rightbin prs sep cons err =
61 prs ++ many (sep ++ fix err prs) >>
62 (fun (x,opxs) -> if opxs = [] then x else
63 let ops,xs = unzip opxs in
64 itlist2 cons ops (x::butlast xs) (last xs));;
66 let possibly prs input =
67 try let x,rest = prs input in [x],rest
68 with Noparse -> [],input;;
73 | (h::t) -> if p h then (h,t) else raise Noparse;;
75 let a tok = some (fun item -> item = tok);;
77 let rec atleast n prs i =
78 (if n <= 0 then many prs
79 else prs ++ atleast (n - 1) prs >> (fun (h,t) -> h::t)) i;;
82 if input = [] then 0,input else failwith "Unparsed input";;
84 (* ------------------------------------------------------------------------- *)
85 (* The basic lexical classes: identifiers, strings and reserved words. *)
86 (* ------------------------------------------------------------------------- *)
88 type lexcode = Ident of string
91 (* ------------------------------------------------------------------------- *)
92 (* Lexical analyzer. Apart from some special bracket symbols, each *)
93 (* identifier is made up of the longest string of alphanumerics or *)
94 (* the longest string of symbolics. *)
95 (* ------------------------------------------------------------------------- *)
97 reserve_words ["//"];;
99 let comment_token = ref (Resword "//");;
102 let collect (h,t) = end_itlist (^) (h::t) in
106 if is_reserved_word n then Resword(n) else tok
108 let stringof p = atleast 1 p >> end_itlist (^) in
109 let simple_ident = stringof(some isalnum) || stringof(some issymb) in
110 let undertail = stringof (a "_") ++ possibly simple_ident >> collect in
111 let ident = (undertail || simple_ident) ++ many undertail >> collect in
112 let septok = stringof(some issep) in
115 "\\"::rst -> "\\",rst
116 | "\""::rst -> "\"",rst
117 | "\'"::rst -> "\'",rst
118 | "n"::rst -> "\n",rst
119 | "r"::rst -> "\r",rst
120 | "t"::rst -> "\t",rst
121 | "b"::rst -> "\b",rst
122 | " "::rst -> " ",rst
124 String.make 1 (Char.chr(int_of_string("0x"^h^l))),rst
125 | a::b::c::rst when forall isnum [a;b;c] ->
126 String.make 1 (Char.chr(int_of_string(a^b^c))),rst
127 | _ -> failwith "lex:unrecognized OCaml-style escape in string" in
129 some (fun i -> i <> "\\" & i <> "\"")
130 || (a "\\" ++ escapecode >> snd) in
131 let string = a "\"" ++ many stringchar ++ a "\"" >>
132 (fun ((_,s),_) -> "\""^implode s^"\"") in
133 let rawtoken = (string || some isbra || septok || ident) >>
134 (fun x -> Ident x) in
135 let simptoken = many (some isspace) ++ rawtoken >> (reserve o snd) in
137 try let (t,rst) = simptoken i in
138 if t = !comment_token then
139 (many (fun i -> if i <> [] & hd i <> "\n" then 1,tl i
140 else raise Noparse) ++ tokens >> snd) rst
142 let toks,rst1 = tokens rst in t::toks,rst1
143 with Noparse -> [],i in
144 fst o (tokens ++ many (some isspace) ++ finished >> (fst o fst));;
146 (* ------------------------------------------------------------------------- *)
147 (* Parser for pretypes. Concrete syntax: *)
149 (* TYPE :: SUMTYPE -> TYPE *)
152 (* SUMTYPE :: PRODTYPE + SUMTYPE *)
155 (* PRODTYPE :: POWTYPE # PRODTYPE *)
158 (* POWTYPE :: APPTYPE ^ POWTYPE *)
161 (* APPTYPE :: ATOMICTYPES type-constructor [Provided arity matches] *)
162 (* | ATOMICTYPES [Provided only 1 ATOMICTYPE] *)
164 (* ATOMICTYPES :: type-constructor [Provided arity zero] *)
165 (* | type-variable *)
167 (* | ( TYPE LIST ) *)
169 (* TYPELIST :: TYPE , TYPELIST *)
172 (* Two features make this different from previous HOL type syntax: *)
174 (* o Any identifier not in use as a type constant will be parsed as a *)
175 (* type variable; a ' is not needed and a * is not allowed. *)
177 (* o Antiquotation is not supported. *)
178 (* ------------------------------------------------------------------------- *)
181 let btyop n n' x y = Ptycon(n,[x;y])
185 | (tys,[c]) -> Ptycon(c,tys)
186 | _ -> failwith "Bad type construction"
187 and type_atom input =
190 (try pretype_of_type(assoc s (type_abbrevs())) with Failure _ ->
191 if try get_type_arity s = 0 with Failure _ -> false
192 then Ptycon(s,[]) else Utv(s)),rest
194 and type_constructor input =
196 (Ident s)::rest -> if try get_type_arity s > 0 with Failure _ -> false
197 then s,rest else raise Noparse
198 | _ -> raise Noparse in
199 let rec pretype i = rightbin sumtype (a (Resword "->")) (btyop "fun") "type" i
200 and sumtype i = rightbin prodtype (a (Ident "+")) (btyop "sum") "type" i
201 and prodtype i = rightbin carttype (a (Ident "#")) (btyop "prod") "type" i
202 and carttype i = leftbin apptype (a (Ident "^")) (btyop "cart") "type" i
203 and apptype i = (atomictypes ++ (type_constructor >> (fun x -> [x])
204 || nothing) >> mk_apptype) i
206 (((a (Resword "(")) ++ typelist ++ a (Resword ")") >> (snd o fst))
207 || type_atom >> (fun x -> [x])) i
208 and typelist i = listof pretype (a (Ident ",")) "type" i in
211 (* ------------------------------------------------------------------------- *)
212 (* Hook to allow installation of user parsers. *)
213 (* ------------------------------------------------------------------------- *)
215 let install_parser,delete_parser,installed_parsers,try_user_parser =
216 let rec try_parsers ps i =
217 if ps = [] then raise Noparse else
218 try snd(hd ps) i with Noparse -> try_parsers (tl ps) i in
220 ref([]:(string*(lexcode list -> preterm * lexcode list))list) in
221 (fun dat -> parser_list := dat::(!parser_list)),
222 (fun key -> try parser_list := snd (remove (fun (key',_) -> key = key')
224 with Failure _ -> ()),
225 (fun () -> !parser_list),
226 (fun i -> try_parsers (!parser_list) i);;
228 (* ------------------------------------------------------------------------- *)
229 (* Initial preterm parsing. This uses binder and precedence/associativity/ *)
230 (* prefix status to guide parsing and preterm construction, but treats all *)
231 (* identifiers as variables. *)
233 (* PRETERM :: APPL_PRETERM binop APPL_PRETERM *)
236 (* APPL_PRETERM :: APPL_PRETERM : type *)
237 (* | APPL_PRETERM BINDER_PRETERM *)
238 (* | BINDER_PRETERM *)
240 (* BINDER_PRETERM :: binder VARSTRUCT_PRETERMS . PRETERM *)
241 (* | let PRETERM and ... and PRETERM in PRETERM *)
242 (* | ATOMIC_PRETERM *)
244 (* VARSTRUCT_PRETERMS :: TYPED_PRETERM VARSTRUCT_PRETERMS *)
245 (* | TYPED_PRETERM *)
247 (* TYPED_PRETERM :: TYPED_PRETERM : type *)
248 (* | ATOMIC_PRETERM *)
250 (* ATOMIC_PRETERM :: ( PRETERM ) *)
251 (* | if PRETERM then PRETERM else PRETERM *)
252 (* | [ PRETERM; .. ; PRETERM ] *)
253 (* | { PRETERM, .. , PRETERM } *)
254 (* | { PRETERM | PRETERM } *)
257 (* Note that arbitrary preterms are allowed as varstructs. This allows *)
258 (* more general forms of matching and considerably regularizes the syntax. *)
259 (* ------------------------------------------------------------------------- *)
262 let rec pairwise r l =
265 | h::t -> forall (r h) t & pairwise r t in
266 let rec pfrees ptm acc =
269 if v = "" & pty = dpty then acc
270 else if can get_const_type v or can num_of_string v
271 or exists (fun (w,_) -> v = w) (!the_interface) then acc
274 | Combp(p1,p2) -> pfrees p1 (pfrees p2 acc)
275 | Absp(p1,p2) -> subtract (pfrees p2 acc) (pfrees p1 [])
276 | Typing(p,_) -> pfrees p acc in
277 let pdest_eq (Combp(Combp(Varp(("="|"<=>"),_),l),r)) = l,r in
278 let pmk_let (letbindings,body) =
279 let vars,tms = unzip (map pdest_eq letbindings) in
281 (pairwise (fun s t -> intersect(pfrees s []) (pfrees t []) = []) vars))
282 "duplicate names on left of let-binding: latest is used" in
283 let lend = Combp(Varp("LET_END",dpty),body) in
284 let abs = itlist (fun v t -> Absp(v,t)) vars lend in
285 let labs = Combp(Varp("LET",dpty),abs) in
286 rev_itlist (fun x f -> Combp(f,x)) tms labs in
287 let pmk_vbinder(n,v,bod) =
288 if n = "\\" then Absp(v,bod)
289 else Combp(Varp(n,dpty),Absp(v,bod)) in
290 let pmk_binder(n,vs,bod) =
291 itlist (fun v b -> pmk_vbinder(n,v,b)) vs bod in
292 let pmk_set_enum ptms =
293 itlist (fun x t -> Combp(Combp(Varp("INSERT",dpty),x),t)) ptms
294 (Varp("EMPTY",dpty)) in
296 let gcounter = ref 0 in
297 fun () -> let count = !gcounter in
298 (gcounter := count + 1;
299 Varp("GEN%PVAR%"^(string_of_int count),dpty)) in
300 let pmk_exists(v,ptm) = Combp(Varp("?",dpty),Absp(v,ptm)) in
302 itlist (fun x y -> Combp(Combp(Varp("CONS",dpty),x),y))
303 els (Varp("NIL",dpty)) in
305 let tt = Varp("T",dpty) and ff = Varp("F",dpty) in
306 fun b -> if b then tt else ff in
308 let lis = map (fun i -> pmk_bool((c / (1 lsl i)) mod 2 = 1)) (0--7) in
309 itlist (fun x y -> Combp(y,x)) lis (Varp("ASCII",dpty)) in
311 let ns = map (fun i -> Char.code(String.get s i))
312 (0--(String.length s - 1)) in
313 pmk_list(map pmk_char ns) in
314 let pmk_setcompr (fabs,bvs,babs) =
316 let bod = itlist (curry pmk_exists) bvs
317 (Combp(Combp(Combp(Varp("SETSPEC",dpty),v),babs),fabs)) in
318 Combp(Varp("GSPEC",dpty),Absp(v,bod)) in
319 let pmk_setabs (fabs,babs) =
321 let fvs = pfrees fabs []
322 and bvs = pfrees babs [] in
323 if length fvs <= 1 or bvs = [] then fvs
324 else intersect fvs bvs in
325 pmk_setcompr (fabs,evs,babs) in
326 let rec mk_precedence infxs prs inp =
329 let topins,rest = partition (fun (s',pat') -> pat' = (p,at)) infxs in
330 (if at = "right" then rightbin else leftbin)
331 (mk_precedence rest prs)
332 (end_itlist (||) (map (fun (s,_) -> a (Ident s)) topins))
333 (fun (Ident op) x y -> Combp(Combp(Varp(op,dpty),x),y))
334 ("term after binary operator")
337 let pmk_geq s t = Combp(Combp(Varp("GEQ",dpty),s),t) in
338 let pmk_pattern ((pat,guards),res) =
339 let x = pgenvar() and y = pgenvar() in
340 let vs = pfrees pat []
343 Combp(Combp(Varp("_UNGUARDED_PATTERN",dpty),pmk_geq pat x),
346 Combp(Combp(Combp(Varp("_GUARDED_PATTERN",dpty),pmk_geq pat x),
349 Absp(x,Absp(y,itlist (curry pmk_exists) vs bod)) in
350 let pretype = parse_pretype
353 Ident s::rst when String.length s >= 2 &
354 String.sub s 0 1 = "\"" &
355 String.sub s (String.length s - 1) 1 = "\""
356 -> String.sub s 1 (String.length s - 2),rst
358 and singleton1 x = [x]
359 and lmk_ite (((((_,b),_),l),_),r) =
360 Combp(Combp(Combp(Varp("COND",dpty),b),l),r)
362 function (p,[]) -> p | (p,[ty]) -> Typing(p,ty) | _ -> fail()
363 and lmk_let (((_,bnds),_),ptm) = pmk_let (bnds,ptm)
364 and lmk_binder ((((s,h),t),_),p) = pmk_binder(s,h::t,p)
365 and lmk_setenum(l,_) = pmk_set_enum l
366 and lmk_setabs(((l,_),r),_) = pmk_setabs(l,r)
367 and lmk_setcompr(((((f,_),vs),_),b),_) =
368 pmk_setcompr(f,pfrees vs [],b)
369 and lmk_decimal ((_,l0),ropt) =
370 let l,r = if ropt = [] then l0,"1" else
372 let n_l = num_of_string l0
373 and n_r = num_of_string r0 in
374 let n_d = power_num (Int 10) (Int (String.length r0)) in
375 let n_n = n_l */ n_d +/ n_r in
376 string_of_num n_n,string_of_num n_d in
377 Combp(Combp(Varp("DECIMAL",dpty),Varp(l,dpty)),Varp(r,dpty))
378 and lmk_univ((_,pty),_) =
379 Typing(Varp("UNIV",dpty),Ptycon("fun",[pty;Ptycon("bool",[])]))
382 ((Ident s):: rest) -> s,rest
386 ((Ident s):: rest) ->
387 if can get_infix_status s or is_prefix s or parses_as_binder s
388 then raise Noparse else s,rest
392 ((Ident s):: rest) ->
393 if parses_as_binder s then s,rest else raise Noparse
397 ((Ident s):: rest) ->
398 if is_prefix s then s,rest else raise Noparse
399 | _ -> raise Noparse in
401 mk_precedence (infixes()) typed_appl_preterm i
402 and nocommapreterm i =
403 let infs = filter (fun (s,_) -> s <> ",") (infixes()) in
404 mk_precedence infs typed_appl_preterm i
405 and typed_appl_preterm i =
407 possibly (a (Resword ":") ++ pretype >> snd)
410 (pre_fix ++ appl_preterm
411 >> (fun (x,y) -> Combp(Varp(x,dpty),y))
412 || binder_preterm ++ many binder_preterm >>
413 (fun (h,t) -> itlist (fun x y -> Combp(y,x)) (rev t) h)) i
414 and binder_preterm i =
415 (a (Resword "let") ++
416 leftbin (preterm >> singleton1) (a (Resword "and")) (K (@)) "binding" ++
422 many typed_apreterm ++
427 and typed_apreterm i =
429 possibly (a (Resword ":") ++ pretype >> snd)
431 and atomic_preterm i =
433 || (a (Resword "(") ++ a (Resword ":")) ++ pretype ++ a (Resword ")")
437 || a (Resword "(") ++
438 (any_identifier >> (fun s -> Varp(s,dpty))) ++
441 || a (Resword "(") ++
445 || a (Resword "if") ++
447 a (Resword "then") ++
449 a (Resword "else") ++
452 || a (Resword "[") ++
453 elistof preterm (a (Resword ";")) "term" ++
455 >> (pmk_list o snd o fst)
456 || a (Resword "{") ++
457 (elistof nocommapreterm (a (Ident ",")) "term" ++ a (Resword "}")
459 || preterm ++ a (Resword "|") ++ preterm ++ a (Resword "}")
461 || preterm ++ a (Resword "|") ++ preterm ++
462 a (Resword "|") ++ preterm ++ a (Resword "}")
465 || a (Resword "match") ++ preterm ++ a (Resword "with") ++ clauses
466 >> (fun (((_,e),_),c) -> Combp(Combp(Varp("_MATCH",dpty),e),c))
467 || a (Resword "function") ++ clauses
468 >> (fun (_,c) -> Combp(Varp("_FUNCTION",dpty),c))
469 || a (Ident "#") ++ identifier ++
470 possibly (a (Resword ".") ++ identifier >> snd)
473 >> (fun s -> Varp(s,dpty))) i
475 (preterm ++ possibly (a (Resword "when") ++ preterm >> snd)) i
477 ((pattern ++ (a (Resword "->") ++ preterm >> snd)) >> pmk_pattern) i
479 ((possibly (a (Resword "|")) ++
480 listof clause (a (Resword "|")) "pattern-match clause" >> snd)
481 >> end_itlist (fun s t -> Combp(Combp(Varp("_SEQPATTERN",dpty),s),t))) i in
484 [Ident s] -> Varp(s,dpty),[]
485 | _ -> preterm inp);;
487 (* ------------------------------------------------------------------------- *)
488 (* Type and term parsers. *)
489 (* ------------------------------------------------------------------------- *)
492 let pty,l = (parse_pretype o lex o explode) s in
493 if l = [] then type_of_pretype pty
494 else failwith "Unparsed input following type";;
497 let ptm,l = (parse_preterm o lex o explode) s in
499 (term_of_preterm o (retypecheck [])) ptm
500 else failwith "Unparsed input following term";;
502 let retyvar_funny tm =
503 let tvs = type_vars_in_term tm in
504 let (tvsgood, tvsbad) = List.partition (fun ty -> try (dest_vartype ty).[0] <> '?' with _ -> false) tvs in
505 let tyvno = ref (-1) in
508 let ret = mk_vartype ("?" ^ string_of_int !tyvno) in
509 if List.mem ret tvsgood then nexttv () else ret
511 let ins = List.map (fun x -> (nexttv (),x)) tvsbad in
516 let tm = parse_term s in
517 if type_of tm = bool_ty then retyvar_funny tm else tm