(*****************************************************************************
*
*  mp.ml
*
*  An HOL mechanization of the compiler correctness proof of McCarthy and
*  Painter from 1967.
*
*  From a HOL-4 original by Robert Bauer and Ray Toal
*
*  HOL Light proof by John Harrison, 21st April 2004
*
*****************************************************************************)
(* ------------------------------------------------------------------------- *)
(* Define a type of strings, not already there in HOL Light.                 *)
(* We don't use any particular properties of the type in the proof below.    *)
(* ------------------------------------------------------------------------- *)
;
(* ------------------------------------------------------------------------- *)
(* The definitions from Robert's file.                                       *)
(* ------------------------------------------------------------------------- *)
(*
 *  The source language
 *  -------------------
 *
 *  Syntax:
 *
 *  The language contains only expressions of three kinds: (1) simple
 *  numeric literals, (2) simple variables, and (3) plus expressions.
 *)
;
(*
 *  Semantics:
 *
 *  Expressions evaluated in a state produce a result.  There are no
 *  side effects.  A state is simply a mapping from variables to
 *  values.  The semantic function is called E.
 *)
let E_DEF = new_recursive_definition exp_RECURSION
           `(E (Lit n) s = n)
        /\  (E (Var v) s = s v)
        /\  (E (Plus e1 e2) s = E e1 s + E e2 s)`;; 
(*
 *  The object language
 *  -------------------
 *
 *  Syntax:
 *
 *  The target machine has a single accumulator (Acc) and an infinite
 *  set of numbered registers (Reg 0, Reg 1, Reg 2, and so on).  The
 *  accumulator and registers together are called cells.  There are four
 *  instructions: LI (load immediate into accumulator), LOAD (load the
 *  contents of a numbered register into the accumulator), STO (store
 *  the accumulator value into a numbered register) and ADD (add the
 *  contents of a numbered register into the accumulator).
 *)
;
;
(*
 *  update x z s is the state that is just like s except that x now
 *  maps to z.  This definition applies to any kind of state.
 *)
let update_def =
    new_definition `update x z s y = if (y = x) then z else s y`;; 
(*
 *  Semantics:
 *
 *  First, the semantics of the execution of a single instruction.
 *  The semantic function is called S.  Executing an instruction in
 *  a machine state produces a new machine state.  Here a machine
 *  state is a mapping from cells to values.
 *)
let S_DEF = new_recursive_definition inst_RECURSION
           `(S (LI n) s = update Acc n s)
        /\  (S (LOAD r) s = update Acc (s (Reg r)) s)
        /\  (S (STO r) s = update (Reg r) (s Acc) s)
        /\  (S (ADD r) s = update Acc (s (Reg r) + s Acc) s)`;; 
(*
 *  Next we give the semantics of a list of instructions with the
 *  semantic function S'.  The execution of an intruction list
 *  in an initial state is given by executing the first instruction
 *  in the list in the initial state, which produce a new state s1,
 *  and taking the execution of the rest of the list in s1.
 *)
(*
 *  The compiler
 *  ------------
 *
 *  Each source language expression is compiled into a list of
 *  instructions.  The compilation is done using a symbol table
 *  which maps source language indentifiers into target machine
 *  register numbers, and a parameter r which tells the next
 *  available free register.
 *)
let C_DEF = new_recursive_definition exp_RECURSION
            `(C (Lit n) map r = [LI n])
         /\  (C (Var v) map r = [LOAD (map v)])
         /\  (C (Plus e1 e2) map r =
                  APPEND
                      (APPEND (C e1 map r) [STO r])
                      (APPEND (C e2 map (r + 1)) [ADD r]))`;; 
(* ------------------------------------------------------------------------- *)
(* My key lemmas; UPDATE_DIFFERENT and S'_APPEND are the same as Robert's.   *)
(* ------------------------------------------------------------------------- *)
let cellth = CONJ (distinctness "cell") (injectivity "cell");;
(*
 *  The Correctness Condition
 *  -------------------------
 *
 *  The correctness condition is this:
 *
 *  For every expression e, symbol table map, source state s,
 *      target state s', register number r:
 *
 *  If all source variables map to registers LESS THAN r,
 *  and if the value of every variable v in s is exactly
 *  the same as the value in s' of the register to which
 *  v is mapped by map, THEN
 *
 *  When e is compiled with map and first free register r,
 *  and then executed in the state s', in the resulting
 *  machine state S'(C e map r):
 *
 *  the accumulator will contain E e s and every register
 *  with number x less than r will have the same value as
 *  it does in s'.
 *
 *  The Proof
 *  ---------
 *
 *  The proof can be done by induction and careful application of SIMP_TAC[]
 *  using the lemmas isolated above.
 *
 *  The only "hack" is to throw in GSYM SKOLEM_THM and EXISTS_REFL to dispose
 *  of state existence subgoals of the form `?s. !v. s v = t[v]`, which
 *  otherwise would not be proven automatically by the simplifier.
 *)
let CORRECTNESS_THEOREM = prove
 (`!e map s s' r.                                                      
      (!v. map v < r) ==>                                              
      (!v. s v = s' (Reg (map v))) ==>                                 
          (S' (C e map r) s' Acc = E e s) /\                           
          (!x. (x < r) ==> (S' (C e map r) s' (Reg x) = s' (Reg x)))`,