1 (*****************************************************************************
5 * An HOL mechanization of the compiler correctness proof of McCarthy and
8 * From a HOL-4 original by Robert Bauer and Ray Toal
10 * HOL Light proof by John Harrison, 21st April 2004
12 *****************************************************************************)
14 (* ------------------------------------------------------------------------- *)
15 (* Define a type of strings, not already there in HOL Light. *)
16 (* We don't use any particular properties of the type in the proof below. *)
17 (* ------------------------------------------------------------------------- *)
19 let string_INDUCT,string_RECURSION =
20 define_type "string = String (int list)";;
22 (* ------------------------------------------------------------------------- *)
23 (* The definitions from Robert's file. *)
24 (* ------------------------------------------------------------------------- *)
32 * The language contains only expressions of three kinds: (1) simple
33 * numeric literals, (2) simple variables, and (3) plus expressions.
36 let exp_INDUCT,exp_RECURSION =
37 define_type "exp = Lit num
44 * Expressions evaluated in a state produce a result. There are no
45 * side effects. A state is simply a mapping from variables to
46 * values. The semantic function is called E.
49 let E_DEF = new_recursive_definition exp_RECURSION
51 /\ (E (Var v) s = s v)
52 /\ (E (Plus e1 e2) s = E e1 s + E e2 s)`;;
60 * The target machine has a single accumulator (Acc) and an infinite
61 * set of numbered registers (Reg 0, Reg 1, Reg 2, and so on). The
62 * accumulator and registers together are called cells. There are four
63 * instructions: LI (load immediate into accumulator), LOAD (load the
64 * contents of a numbered register into the accumulator), STO (store
65 * the accumulator value into a numbered register) and ADD (add the
66 * contents of a numbered register into the accumulator).
69 let cell_INDUCT,cell_RECURSION =
70 define_type "cell = Acc
73 let inst_INDUCT,inst_RECURSION =
74 define_type "inst = LI num
80 * update x z s is the state that is just like s except that x now
81 * maps to z. This definition applies to any kind of state.
85 new_definition `update x z s y = if (y = x) then z else s y`;;
90 * First, the semantics of the execution of a single instruction.
91 * The semantic function is called S. Executing an instruction in
92 * a machine state produces a new machine state. Here a machine
93 * state is a mapping from cells to values.
96 let S_DEF = new_recursive_definition inst_RECURSION
97 `(S (LI n) s = update Acc n s)
98 /\ (S (LOAD r) s = update Acc (s (Reg r)) s)
99 /\ (S (STO r) s = update (Reg r) (s Acc) s)
100 /\ (S (ADD r) s = update Acc (s (Reg r) + s Acc) s)`;;
103 * Next we give the semantics of a list of instructions with the
104 * semantic function S'. The execution of an intruction list
105 * in an initial state is given by executing the first instruction
106 * in the list in the initial state, which produce a new state s1,
107 * and taking the execution of the rest of the list in s1.
110 let S'_DEF = new_recursive_definition list_RECURSION
112 /\ (S' (CONS inst rest) s = S' rest (S inst s))`;;
118 * Each source language expression is compiled into a list of
119 * instructions. The compilation is done using a symbol table
120 * which maps source language indentifiers into target machine
121 * register numbers, and a parameter r which tells the next
122 * available free register.
125 let C_DEF = new_recursive_definition exp_RECURSION
126 `(C (Lit n) map r = [LI n])
127 /\ (C (Var v) map r = [LOAD (map v)])
128 /\ (C (Plus e1 e2) map r =
130 (APPEND (C e1 map r) [STO r])
131 (APPEND (C e2 map (r + 1)) [ADD r]))`;;
133 (* ------------------------------------------------------------------------- *)
134 (* My key lemmas; UPDATE_DIFFERENT and S'_APPEND are the same as Robert's. *)
135 (* ------------------------------------------------------------------------- *)
137 let cellth = CONJ (distinctness "cell") (injectivity "cell");;
139 let S'_APPEND = prove
140 (`!p1 p2 s. S' (APPEND p1 p2) s = S' p2 (S' p1 s)`,
141 LIST_INDUCT_TAC THEN ASM_SIMP_TAC[S'_DEF; APPEND]);;
143 let UPDATE_DIFFERENT = prove
144 (`!x y z s. ~(x = y) ==> (update x z s y = s y)`,
145 SIMP_TAC[update_def]);;
147 let UPDATE_SAME = prove
148 (`!x z s. update x z s x = z`,
149 SIMP_TAC[update_def]);;
152 * The Correctness Condition
153 * -------------------------
155 * The correctness condition is this:
157 * For every expression e, symbol table map, source state s,
158 * target state s', register number r:
160 * If all source variables map to registers LESS THAN r,
161 * and if the value of every variable v in s is exactly
162 * the same as the value in s' of the register to which
163 * v is mapped by map, THEN
165 * When e is compiled with map and first free register r,
166 * and then executed in the state s', in the resulting
167 * machine state S'(C e map r):
169 * the accumulator will contain E e s and every register
170 * with number x less than r will have the same value as
176 * The proof can be done by induction and careful application of SIMP_TAC[]
177 * using the lemmas isolated above.
179 * The only "hack" is to throw in GSYM SKOLEM_THM and EXISTS_REFL to dispose
180 * of state existence subgoals of the form `?s. !v. s v = t[v]`, which
181 * otherwise would not be proven automatically by the simplifier.
184 let CORRECTNESS_THEOREM = prove
187 (!v. s v = s' (Reg (map v))) ==>
188 (S' (C e map r) s' Acc = E e s) /\
189 (!x. (x < r) ==> (S' (C e map r) s' (Reg x) = s' (Reg x)))`,
190 MATCH_MP_TAC exp_INDUCT THEN
191 REWRITE_TAC[E_DEF; S_DEF; S'_DEF; update_def; C_DEF; S'_APPEND] THEN
192 SIMP_TAC[ARITH_RULE `(x < y ==> x < y + 1 /\ ~(x = y)) /\ x < x + 1`; cellth;
193 UPDATE_SAME; UPDATE_DIFFERENT; GSYM SKOLEM_THM; EXISTS_REFL]);;