section {* Deciding Regular Expression Equivalence *} theory Equivalence_Checking imports NDerivative "~~/src/HOL/Library/While_Combinator" begin subsection {* Bisimulation between languages and regular expressions *} coinductive bisimilar :: "'a lang ⇒ 'a lang ⇒ bool" where "([] ∈ K ⟷ [] ∈ L) ⟹ (⋀x. bisimilar (Deriv x K) (Deriv x L)) ⟹ bisimilar K L" lemma equal_if_bisimilar: assumes "bisimilar K L" shows "K = L" proof (rule set_eqI) fix w from `bisimilar K L` show "w ∈ K ⟷ w ∈ L" proof (induct w arbitrary: K L) case Nil thus ?case by (auto elim: bisimilar.cases) next case (Cons a w K L) from `bisimilar K L` have "bisimilar (Deriv a K) (Deriv a L)" by (auto elim: bisimilar.cases) then have "w ∈ Deriv a K ⟷ w ∈ Deriv a L" by (rule Cons(1)) thus ?case by (auto simp: Deriv_def) qed qed lemma language_coinduct: fixes R (infixl "∼" 50) assumes "K ∼ L" assumes "⋀K L. K ∼ L ⟹ ([] ∈ K ⟷ [] ∈ L)" assumes "⋀K L x. K ∼ L ⟹ Deriv x K ∼ Deriv x L" shows "K = L" apply (rule equal_if_bisimilar) apply (rule bisimilar.coinduct[of R, OF `K ∼ L`]) apply (auto simp: assms) done type_synonym 'a rexp_pair = "'a rexp * 'a rexp" type_synonym 'a rexp_pairs = "'a rexp_pair list" definition is_bisimulation :: "'a::order list ⇒ 'a rexp_pair set ⇒ bool" where "is_bisimulation as R = (∀(r,s)∈ R. (atoms r ∪ atoms s ⊆ set as) ∧ (nullable r ⟷ nullable s) ∧ (∀a∈set as. (nderiv a r, nderiv a s) ∈ R))" lemma bisim_lang_eq: assumes bisim: "is_bisimulation as ps" assumes "(r, s) ∈ ps" shows "lang r = lang s" proof - def ps' ≡ "insert (Zero, Zero) ps" from bisim have bisim': "is_bisimulation as ps'" by (auto simp: ps'_def is_bisimulation_def) let ?R = "λK L. (∃(r,s)∈ps'. K = lang r ∧ L = lang s)" show ?thesis proof (rule language_coinduct[where R="?R"]) from `(r, s) ∈ ps` have "(r, s) ∈ ps'" by (auto simp: ps'_def) thus "?R (lang r) (lang s)" by auto next fix K L assume "?R K L" then obtain r s where rs: "(r, s) ∈ ps'" and KL: "K = lang r" "L = lang s" by auto with bisim' have "nullable r ⟷ nullable s" by (auto simp: is_bisimulation_def) thus "[] ∈ K ⟷ [] ∈ L" by (auto simp: nullable_iff KL) fix a show "?R (Deriv a K) (Deriv a L)" proof cases assume "a ∈ set as" with rs bisim' have "(nderiv a r, nderiv a s) ∈ ps'" by (auto simp: is_bisimulation_def) thus ?thesis by (force simp: KL lang_nderiv) next assume "a ∉ set as" with bisim' rs have "a ∉ atoms r" "a ∉ atoms s" by (auto simp: is_bisimulation_def) then have "nderiv a r = Zero" "nderiv a s = Zero" by (auto intro: deriv_no_occurrence) then have "Deriv a K = lang Zero" "Deriv a L = lang Zero" unfolding KL lang_nderiv[symmetric] by auto thus ?thesis by (auto simp: ps'_def) qed qed qed subsection {* Closure computation *} definition closure :: "'a::order list ⇒ 'a rexp_pair ⇒ ('a rexp_pairs * 'a rexp_pair set) option" where "closure as = rtrancl_while (%(r,s). nullable r = nullable s) (%(r,s). map (λa. (nderiv a r, nderiv a s)) as)" definition pre_bisim :: "'a::order list ⇒ 'a rexp ⇒ 'a rexp ⇒ 'a rexp_pairs * 'a rexp_pair set ⇒ bool" where "pre_bisim as r s = (λ(ws,R). (r,s) ∈ R ∧ set ws ⊆ R ∧ (∀(r,s)∈ R. atoms r ∪ atoms s ⊆ set as) ∧ (∀(r,s)∈ R - set ws. (nullable r ⟷ nullable s) ∧ (∀a∈set as. (nderiv a r, nderiv a s) ∈ R)))" theorem closure_sound: assumes result: "closure as (r,s) = Some([],R)" and atoms: "atoms r ∪ atoms s ⊆ set as" shows "lang r = lang s" proof- let ?test = "While_Combinator.rtrancl_while_test (%(r,s). nullable r = nullable s)" let ?step = "While_Combinator.rtrancl_while_step (%(r,s). map (λa. (nderiv a r, nderiv a s)) as)" { fix st assume inv: "pre_bisim as r s st" and test: "?test st" have "pre_bisim as r s (?step st)" proof (cases st) fix ws R assume "st = (ws, R)" with test obtain r s t where st: "st = ((r, s) # t, R)" and "nullable r = nullable s" by (cases ws) auto with inv show ?thesis using atoms_nderiv[of _ r] atoms_nderiv[of _ s] unfolding st rtrancl_while_test.simps rtrancl_while_step.simps pre_bisim_def Ball_def by simp_all blast+ qed } moreover from atoms have "pre_bisim as r s ([(r,s)],{(r,s)})" by (simp add: pre_bisim_def) ultimately have pre_bisim_ps: "pre_bisim as r s ([],R)" by (rule while_option_rule[OF _ result[unfolded closure_def rtrancl_while_def]]) then have "is_bisimulation as R" "(r, s) ∈ R" by (auto simp: pre_bisim_def is_bisimulation_def) thus "lang r = lang s" by (rule bisim_lang_eq) qed subsection {* Bisimulation-free proof of closure computation *} text{* The equivalence check can be viewed as the product construction of two automata. The state space is the reflexive transitive closure of the pair of next-state functions, i.e. derivatives. *} lemma rtrancl_nderiv_nderivs: defines "nderivs == foldl (%r a. nderiv a r)" shows "{((r,s),(nderiv a r,nderiv a s))| r s a. a : A}^* = {((r,s),(nderivs r w,nderivs s w))| r s w. w : lists A}" (is "?L = ?R") proof- note [simp] = nderivs_def { fix r s r' s' have "((r,s),(r',s')) : ?L ⟹ ((r,s),(r',s')) : ?R" proof(induction rule: converse_rtrancl_induct2) case refl show ?case by (force intro!: foldl.simps(1)[symmetric]) next case step thus ?case by(force intro!: foldl.simps(2)[symmetric]) qed } moreover { fix r s r' s' { fix w have "∀x∈set w. x ∈ A ⟹ ((r, s), nderivs r w, nderivs s w) :?L" proof(induction w rule: rev_induct) case Nil show ?case by simp next case snoc thus ?case by (auto elim!: rtrancl_into_rtrancl) qed } hence "((r,s),(r',s')) : ?R ⟹ ((r,s),(r',s')) : ?L" by auto } ultimately show ?thesis by (auto simp: in_lists_conv_set) blast qed lemma nullable_nderivs: "nullable (foldl (%r a. nderiv a r) r w) = (w : lang r)" by (induct w arbitrary: r) (simp_all add: nullable_iff lang_nderiv Deriv_def) theorem closure_sound_complete: assumes result: "closure as (r,s) = Some(ws,R)" and atoms: "set as = atoms r ∪ atoms s" shows "ws = [] ⟷ lang r = lang s" proof - have leq: "(lang r = lang s) = (∀(r',s') ∈ {((r0,s0),(nderiv a r0,nderiv a s0))| r0 s0 a. a : set as}^* `` {(r,s)}. nullable r' = nullable s')" by(simp add: atoms rtrancl_nderiv_nderivs Ball_def lang_eq_ext imp_ex nullable_nderivs del:Un_iff) have "{(x,y). y ∈ set ((λ(p,q). map (λa. (nderiv a p, nderiv a q)) as) x)} = {((r,s), nderiv a r, nderiv a s) |r s a. a ∈ set as}" by auto with atoms rtrancl_while_Some[OF result[unfolded closure_def]] show ?thesis by (auto simp add: leq Ball_def split: if_splits) qed subsection {* The overall procedure *} primrec add_atoms :: "'a rexp ⇒ 'a list ⇒ 'a list" where "add_atoms Zero = id" | "add_atoms One = id" | "add_atoms (Atom a) = List.insert a" | "add_atoms (Plus r s) = add_atoms s o add_atoms r" | "add_atoms (Times r s) = add_atoms s o add_atoms r" | "add_atoms (Star r) = add_atoms r" lemma set_add_atoms: "set (add_atoms r as) = atoms r ∪ set as" by (induct r arbitrary: as) auto definition check_eqv :: "nat rexp ⇒ nat rexp ⇒ bool" where "check_eqv r s = (let nr = norm r; ns = norm s; as = add_atoms nr (add_atoms ns []) in case closure as (nr, ns) of Some([],_) ⇒ True | _ ⇒ False)" lemma soundness: assumes "check_eqv r s" shows "lang r = lang s" proof - let ?nr = "norm r" let ?ns = "norm s" let ?as = "add_atoms ?nr (add_atoms ?ns [])" obtain R where 1: "closure ?as (?nr,?ns) = Some([],R)" using assms by (auto simp: check_eqv_def Let_def split:option.splits list.splits) then have "lang (norm r) = lang (norm s)" by (rule closure_sound) (auto simp: set_add_atoms dest!: subsetD[OF atoms_norm]) thus "lang r = lang s" by simp qed text{* Test: *} lemma "check_eqv (Plus One (Times (Atom 0) (Star(Atom 0)))) (Star(Atom 0))" by eval end