(*****************************************************************************
*
*  mp.ml
*
*  An HOL mechanization of the compiler correctness proof of McCarthy and
*  Painter from 1967.
*
*  From a HOL-4 original by Robert Bauer and Ray Toal
*
*  HOL Light proof by John Harrison, 21st April 2004
*
*****************************************************************************)

(* ------------------------------------------------------------------------- *)
(* Define a type of strings, not already there in HOL Light.                 *)
(* We don't use any particular properties of the type in the proof below.    *)
(* ------------------------------------------------------------------------- *)

let string_INDUCT,string_RECURSION =
  define_type "string = String (int list)";
; (* ------------------------------------------------------------------------- *) (* The definitions from Robert's file. *) (* ------------------------------------------------------------------------- *) (* * The source language * ------------------- * * Syntax: * * The language contains only expressions of three kinds: (1) simple * numeric literals, (2) simple variables, and (3) plus expressions. *)
let exp_INDUCT,exp_RECURSION =
  define_type "exp = Lit num
                   | Var string
                   | Plus exp exp";
; (* * Semantics: * * Expressions evaluated in a state produce a result. There are no * side effects. A state is simply a mapping from variables to * values. The semantic function is called E. *)
let E_DEF = new_recursive_definition exp_RECURSION
           `(E (Lit n) s = n)
        /\  (E (Var v) s = s v)
        /\  (E (Plus e1 e2) s = E e1 s + E e2 s)`;;
(* * The object language * ------------------- * * Syntax: * * The target machine has a single accumulator (Acc) and an infinite * set of numbered registers (Reg 0, Reg 1, Reg 2, and so on). The * accumulator and registers together are called cells. There are four * instructions: LI (load immediate into accumulator), LOAD (load the * contents of a numbered register into the accumulator), STO (store * the accumulator value into a numbered register) and ADD (add the * contents of a numbered register into the accumulator). *)
let cell_INDUCT,cell_RECURSION =
  define_type "cell = Acc
                    | Reg num";
;
let inst_INDUCT,inst_RECURSION =
  define_type "inst = LI num
                    | LOAD num
                    | STO num
                    | ADD num";
; (* * update x z s is the state that is just like s except that x now * maps to z. This definition applies to any kind of state. *)
let update_def =
    new_definition `update x z s y = if (y = x) then z else s y`;;
(* * Semantics: * * First, the semantics of the execution of a single instruction. * The semantic function is called S. Executing an instruction in * a machine state produces a new machine state. Here a machine * state is a mapping from cells to values. *)
let S_DEF = new_recursive_definition inst_RECURSION
           `(S (LI n) s = update Acc n s)
        /\  (S (LOAD r) s = update Acc (s (Reg r)) s)
        /\  (S (STO r) s = update (Reg r) (s Acc) s)
        /\  (S (ADD r) s = update Acc (s (Reg r) + s Acc) s)`;;
(* * Next we give the semantics of a list of instructions with the * semantic function S'. The execution of an intruction list * in an initial state is given by executing the first instruction * in the list in the initial state, which produce a new state s1, * and taking the execution of the rest of the list in s1. *)
let S'_DEF = new_recursive_definition list_RECURSION
           `(S' [] s = s)
        /\  (S' (CONS inst rest) s = S' rest (S inst s))`;;
(* * The compiler * ------------ * * Each source language expression is compiled into a list of * instructions. The compilation is done using a symbol table * which maps source language indentifiers into target machine * register numbers, and a parameter r which tells the next * available free register. *)
let C_DEF = new_recursive_definition exp_RECURSION
            `(C (Lit n) map r = [LI n])
         /\  (C (Var v) map r = [LOAD (map v)])
         /\  (C (Plus e1 e2) map r =
                  APPEND
                      (APPEND (C e1 map r) [STO r])
                      (APPEND (C e2 map (r + 1)) [ADD r]))`;;
(* ------------------------------------------------------------------------- *) (* My key lemmas; UPDATE_DIFFERENT and S'_APPEND are the same as Robert's. *) (* ------------------------------------------------------------------------- *) let cellth = CONJ (distinctness "cell") (injectivity "cell");;
let S'_APPEND = 
prove (`!p1 p2 s. S' (APPEND p1 p2) s = S' p2 (S' p1 s)`,
LIST_INDUCT_TAC THEN ASM_SIMP_TAC[S'_DEF; APPEND]);;
let UPDATE_DIFFERENT = 
prove (`!x y z s. ~(x = y) ==> (update x z s y = s y)`,
SIMP_TAC[update_def]);;
let UPDATE_SAME = 
prove (`!x z s. update x z s x = z`,
SIMP_TAC[update_def]);;
(* * The Correctness Condition * ------------------------- * * The correctness condition is this: * * For every expression e, symbol table map, source state s, * target state s', register number r: * * If all source variables map to registers LESS THAN r, * and if the value of every variable v in s is exactly * the same as the value in s' of the register to which * v is mapped by map, THEN * * When e is compiled with map and first free register r, * and then executed in the state s', in the resulting * machine state S'(C e map r): * * the accumulator will contain E e s and every register * with number x less than r will have the same value as * it does in s'. * * The Proof * --------- * * The proof can be done by induction and careful application of SIMP_TAC[] * using the lemmas isolated above. * * The only "hack" is to throw in GSYM SKOLEM_THM and EXISTS_REFL to dispose * of state existence subgoals of the form `?s. !v. s v = t[v]`, which * otherwise would not be proven automatically by the simplifier. *)
let CORRECTNESS_THEOREM = 
prove (`!e map s s' r. (!v. map v < r) ==> (!v. s v = s' (Reg (map v))) ==> (S' (C e map r) s' Acc = E e s) /\ (!x. (x < r) ==> (S' (C e map r) s' (Reg x) = s' (Reg x)))`,
MATCH_MP_TAC exp_INDUCT THEN REWRITE_TAC[E_DEF; S_DEF; S'_DEF; update_def; C_DEF; S'_APPEND] THEN SIMP_TAC[ARITH_RULE `(x < y ==> x < y + 1 /\ ~(x = y)) /\ x < x + 1`; cellth; UPDATE_SAME; UPDATE_DIFFERENT; GSYM SKOLEM_THM; EXISTS_REFL]);;