diff --git a/404.html b/404.html index 2c7a1a06..8938ae82 100644 --- a/404.html +++ b/404.html @@ -13,7 +13,7 @@ - +
diff --git a/assets/js/6b1d1fec.7125dbed.js b/assets/js/6b1d1fec.f368363f.js similarity index 96% rename from assets/js/6b1d1fec.7125dbed.js rename to assets/js/6b1d1fec.f368363f.js index 41080b17..97eca40c 100644 --- a/assets/js/6b1d1fec.7125dbed.js +++ b/assets/js/6b1d1fec.f368363f.js @@ -1 +1 @@ -"use strict";(self.webpackChunkformal_land=self.webpackChunkformal_land||[]).push([[5993],{3331:(e,n,i)=>{i.r(n),i.d(n,{assets:()=>l,contentTitle:()=>r,default:()=>d,frontMatter:()=>o,metadata:()=>a,toc:()=>c});var t=i(4848),s=i(8453);const o={title:"\ud83e\udd84 Software correctness from first principles",tags:["formal verification","software correctness","first principles","example","Python"],authors:[]},r=void 0,a={permalink:"/blog/2024/06/05/formal-verification-for-software-correctness",source:"@site/blog/2024-06-05-formal-verification-for-software-correctness.md",title:"\ud83e\udd84 Software correctness from first principles",description:"Formal verification is a technique to verify the absence of bugs in a program by reasoning from first principles. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.",date:"2024-06-05T00:00:00.000Z",formattedDate:"June 5, 2024",tags:[{label:"formal verification",permalink:"/blog/tags/formal-verification"},{label:"software correctness",permalink:"/blog/tags/software-correctness"},{label:"first principles",permalink:"/blog/tags/first-principles"},{label:"example",permalink:"/blog/tags/example"},{label:"Python",permalink:"/blog/tags/python"}],readingTime:7.265,hasTruncateMarker:!0,authors:[],frontMatter:{title:"\ud83e\udd84 Software correctness from first principles",tags:["formal verification","software correctness","first principles","example","Python"],authors:[]},unlisted:!1,nextItem:{title:"\ud83e\udd84 Software correctness from first principles",permalink:"/blog/2024/06/05/software-correctness-from-first-principles"}},l={authorsImageUrls:[]},c=[{value:"Use of formal verification",id:"use-of-formal-verification",level:2},{value:"Definition of programming languages",id:"definition-of-programming-languages",level:2},{value:"Example to verify",id:"example-to-verify",level:2},{value:"Formal verification",id:"formal-verification",level:2},{value:"Completing the property",id:"completing-the-property",level:2},{value:"Conclusion",id:"conclusion",level:2}];function h(e){const n={a:"a",admonition:"admonition",blockquote:"blockquote",code:"code",em:"em",h2:"h2",li:"li",ol:"ol",p:"p",pre:"pre",strong:"strong",ul:"ul",...(0,s.R)(),...e.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(n.p,{children:[(0,t.jsx)(n.strong,{children:"Formal verification"})," is a technique to verify the absence of bugs in a program by reasoning from ",(0,t.jsx)(n.strong,{children:"first principles"}),". Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves."]}),"\n",(0,t.jsx)(n.p,{children:"We will present this idea in detail and illustrate how it works for a very simple example."}),"\n",(0,t.jsx)(n.h2,{id:"use-of-formal-verification",children:"Use of formal verification"}),"\n",(0,t.jsx)(n.p,{children:"We typically use formal verification for critical applications, where either:"}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsx)(n.li,{children:"life is at stake, like in the case of trains, airplanes, medical devices, or"}),"\n",(0,t.jsx)(n.li,{children:"money is at stake, like in the case of financial applications."}),"\n"]}),"\n",(0,t.jsxs)(n.p,{children:["With formal verification, in theory, ",(0,t.jsx)(n.strong,{children:"we can guarantee that the software will never fail"}),", as we can check ",(0,t.jsx)(n.strong,{children:"all possible cases"})," for a given property. A property can be that no non-admin users can read sensitive data, or that a program never fails with uncaught exceptions."]}),"\n",(0,t.jsxs)(n.p,{children:["In this research paper ",(0,t.jsx)(n.a,{href:"https://users.cs.utah.edu/~regehr/papers/pldi11-preprint.pdf",children:"Finding and Understanding Bugs in C Compilers"}),", no bugs were found in the middle-end of the formally verified ",(0,t.jsx)(n.a,{href:"https://en.wikipedia.org/wiki/CompCert",children:"CompCert"})," C compiler, while the other C compilers (GCC, LLVM, ...) all contained subtle bugs. This illustrates that formal verification can be an effective way to make complex software with zero bugs!"]}),"\n",(0,t.jsx)(n.h2,{id:"definition-of-programming-languages",children:"Definition of programming languages"}),"\n",(0,t.jsxs)(n.p,{children:["To be able to reason on a program we go back to the definition of a programming language. These languages (C, JavaScript, Python, ...) are generally defined with a precise set of rules. For example, in Python, the ",(0,t.jsx)(n.code,{children:"if"})," statement is ",(0,t.jsx)(n.a,{href:"https://docs.python.org/3/reference/compound_stmts.html#if",children:"defined in the reference manual"})," by:"]}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:'if_stmt ::= "if" assignment_expression ":" suite\n ("elif" assignment_expression ":" suite)*\n ["else" ":" suite]\n'})}),"\n",(0,t.jsxs)(n.blockquote,{children:["\n",(0,t.jsx)(n.p,{children:"It selects exactly one of the suites by evaluating the expressions one by one until one is found to be true (see section Boolean operations for the definition of true and false); then that suite is executed (and no other part of the if statement is executed or evaluated). If all expressions are false, the suite of the else clause, if present, is executed."}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:"This means that the Python code:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if condition:\n a\nelse:\n b\n"})}),"\n",(0,t.jsxs)(n.p,{children:["will execute ",(0,t.jsx)(n.code,{children:"a"})," when the ",(0,t.jsx)(n.code,{children:"condition"})," is true, and ",(0,t.jsx)(n.code,{children:"b"})," otherwise. There are similar rules for all other program constructs (loops, function definitions, classes, ...)."]}),"\n",(0,t.jsx)(n.p,{children:"To make these rules more manageable, we generally split them into two parts:"}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsxs)(n.li,{children:["The syntax part, that defines what is a valid program in the language. For example, in Python, the syntax is defined by the ",(0,t.jsx)(n.a,{href:"https://docs.python.org/3/reference/grammar.html",children:"grammar"}),"."]}),"\n",(0,t.jsxs)(n.li,{children:["The semantics part, that defines what a program does. This is what we have seen above with the description of the behavior of the ",(0,t.jsx)(n.code,{children:"if"})," statement."]}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:'In formal verification, we will focus on the semantics of programs, assuming that the syntax is already verified by the compiler or interpreter, generating "syntax errors" in case of ill-formed programs.'}),"\n",(0,t.jsx)(n.h2,{id:"example-to-verify",children:"Example to verify"}),"\n",(0,t.jsx)(n.p,{children:"We consider this short Python example of a function returning the maximum number in a list:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"def my_max(l):\n m = l[0]\n for x in l:\n if x > m:\n m = x\n return m\n"})}),"\n",(0,t.jsxs)(n.p,{children:["We assume that the list ",(0,t.jsx)(n.code,{children:"l"})," is not empty and only contains integers. If we run it on a few examples:"]}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"my_max([1, 2, 3]) # => 3\nmy_max([3, 2, 1]) # => 3\nmy_max([1, 3, 2]) # => 3\n"})}),"\n",(0,t.jsxs)(n.p,{children:["it always returns\xa0",(0,t.jsx)(n.code,{children:"3"}),", the biggest number in the list! But can we make sure this is always the case?"]}),"\n",(0,t.jsxs)(n.p,{children:["We can certainly not run\xa0",(0,t.jsx)(n.code,{children:"my_max"})," on all possible lists of integers, as there are infinitely many of them. We need to reason from the definition of the Python language, which is what we call formal verification reasoning."]}),"\n",(0,t.jsx)(n.h2,{id:"formal-verification",children:"Formal verification"}),"\n",(0,t.jsxs)(n.p,{children:["Here is a general specification that we give of the\xa0",(0,t.jsx)(n.code,{children:"my_max"})," function above:"]}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"forall (index : int) (l : list[int]),\n 0 \u2264 index < len(l) \u21d2\n l[index] \u2264 my_max(l)\n"})}),"\n",(0,t.jsxs)(n.p,{children:["It says that for all integer ",(0,t.jsx)(n.code,{children:"index"})," and list of integers ",(0,t.jsx)(n.code,{children:"l"}),", if the index is valid (between\xa0",(0,t.jsx)(n.code,{children:"0"})," and the length of the list), then the element at this index is less than or equal to the maximum of the list that we compute."]}),"\n",(0,t.jsxs)(n.p,{children:["To verify this property for all possible list\xa0",(0,t.jsx)(n.code,{children:"l"}),", we reason by induction. A non-empty list is either:"]}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsx)(n.li,{children:"a list with one element, where the maximum is the only element, or"}),"\n",(0,t.jsx)(n.li,{children:"a list with at least two elements, where the maximum is either the last element or the maximum of the rest of the list."}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:"At the start of the code, we will always have:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"def my_max(l):\n m = l[0]\n"})}),"\n",(0,t.jsxs)(n.p,{children:["with ",(0,t.jsx)(n.code,{children:"m"})," being equal to the first item of the list. Then:"]}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsxs)(n.li,{children:["If the list has only one element, we iterate only once in the ",(0,t.jsx)(n.code,{children:"for"})," loop, with ",(0,t.jsx)(n.code,{children:"x"})," equal to ",(0,t.jsx)(n.code,{children:"l[0]"}),". The condition:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if x > m:\n"})}),"\n","is then equivalent to:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if l[0] > l[0]:\n"})}),"\n","and is always false. We then return ",(0,t.jsx)(n.code,{children:"m = l[0]"}),", which is the only element of the list, and it verifies our property as:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"l[0] \u2264 l[0]\n"})}),"\n"]}),"\n",(0,t.jsxs)(n.li,{children:["If the list has at least two elements, we unroll the code execution of the ",(0,t.jsx)(n.code,{children:"for"})," loop and iterate over all the elements until the last one. Our induction hypothesis tells us that the property we verify is true for the first part of the list, excluding the last element. This means that:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"l[index] \u2264 m\n"})}),"\n","for all ",(0,t.jsx)(n.code,{children:"index"})," between ",(0,t.jsx)(n.code,{children:"0"})," and ",(0,t.jsx)(n.code,{children:"len(l) - 2"}),". When we reach the last element, we have:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if x > m:\n m = x\n"})}),"\n","with ",(0,t.jsx)(n.code,{children:"x"})," being ",(0,t.jsx)(n.code,{children:"l[len(l) - 1]"}),". There are two possibilities. Either ",(0,t.jsx)(n.em,{children:"(i)"})," ",(0,t.jsx)(n.code,{children:"x"})," is less than or equal to ",(0,t.jsx)(n.code,{children:"m"}),", and we do not update ",(0,t.jsx)(n.code,{children:"m"}),", or ",(0,t.jsx)(n.em,{children:"(ii)"})," ",(0,t.jsx)(n.code,{children:"x"})," is greater than ",(0,t.jsx)(n.code,{children:"m"}),", and we update ",(0,t.jsx)(n.code,{children:"m"})," to ",(0,t.jsx)(n.code,{children:"x"}),". In both cases, the property is verified for the last element of the list, as:","\n",(0,t.jsxs)(n.ol,{children:["\n",(0,t.jsxs)(n.li,{children:["In the first case, ",(0,t.jsx)(n.code,{children:"m"})," stays the same, so it is still larger or equal to all the elements of the list except the last one, as well as larger or equal to the last one according to this last\xa0",(0,t.jsx)(n.code,{children:"if"})," statement."]}),"\n",(0,t.jsxs)(n.li,{children:["In the second case, ",(0,t.jsx)(n.code,{children:"m"})," is updated to ",(0,t.jsx)(n.code,{children:"x"}),", which is the last element of the list and a greater value than the original\xa0",(0,t.jsx)(n.code,{children:"m"}),". Then it means that ",(0,t.jsx)(n.code,{children:"m"})," is still larger or equal to all the elements of the list except the last one, being larger that the original\xa0",(0,t.jsx)(n.code,{children:"m"}),", and larger or equal to the last one as it is in fact equals to the last one."]}),"\n"]}),"\n"]}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:"We have now closed our induction proof and verified that our property is true for all possible lists of integers! The reasoning above is rather verbose but should actually correspond to the intuition of most programmers when reading this code."}),"\n",(0,t.jsxs)(n.p,{children:["In practice, with formal verification, the reasoning above is done in a proof assistance such as ",(0,t.jsx)(n.a,{href:"https://coq.inria.fr/",children:"Coq"})," to help making sure that we did not forget any case, and automatically solve simple cases for us. Having a proof written in a proof language like Coq also allows us to re-run it to check that it is still valid after a change in the code, and allows third-party persons to check it without reading all the details."]}),"\n",(0,t.jsx)(n.h2,{id:"completing-the-property",children:"Completing the property"}),"\n",(0,t.jsx)(n.p,{children:"An additional property that we did not verify is:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"forall (l : list[int]),\n exists (index : int),\n 0 \u2264 index < len(l) and\n l[index] = my_max(l)\n"})}),"\n",(0,t.jsx)(n.p,{children:"It says that the maximum of the list is actually in the list. We can verify it by induction in the same way as we did for the first property. You can detail this verification as an exercise."}),"\n",(0,t.jsx)(n.admonition,{title:"For more",type:"info",children:(0,t.jsxs)(n.p,{children:["If you want to go into more details for the formal verification of Python programs, you can look at our ",(0,t.jsx)(n.a,{href:"https://github.com/formal-land/coq-of-python",children:"coq-of-python"})," project, where we define the semantics of Python in Coq and verify properties of Python programs (ongoing project!). We also provide formal verification services for ",(0,t.jsx)(n.a,{href:"https://github.com/formal-land/coq-of-rust",children:"Rust"})," and other languages like ",(0,t.jsx)(n.a,{href:"https://github.com/formal-land/coq-of-ocaml",children:"OCaml"}),". Contact us at\xa0",(0,t.jsx)(n.a,{href:"mailto:contact@formal.land",children:"contact@formal.land"})," to discuss!"]})}),"\n",(0,t.jsx)(n.h2,{id:"conclusion",children:"Conclusion"}),"\n",(0,t.jsxs)(n.p,{children:["We have presented here the idea of ",(0,t.jsx)(n.strong,{children:"formal verification"}),", a technique to verify the absence of bugs in a program by reasoning from ",(0,t.jsx)(n.strong,{children:"first principles"}),". We have illustrated this idea for a simple Python example, showing how we can verify that a function computing the maximum of a list is correct ",(0,t.jsx)(n.strong,{children:"for all possible lists of integers"}),"."]}),"\n",(0,t.jsx)(n.p,{children:"We will continue with more blog posts explaining what we can do with formal verification and why it matters. Feel free to share this post and tell us what subjects you would like to see covered!"})]})}function d(e={}){const{wrapper:n}={...(0,s.R)(),...e.components};return n?(0,t.jsx)(n,{...e,children:(0,t.jsx)(h,{...e})}):h(e)}},8453:(e,n,i)=>{i.d(n,{R:()=>r,x:()=>a});var t=i(6540);const s={},o=t.createContext(s);function r(e){const n=t.useContext(o);return t.useMemo((function(){return"function"==typeof e?e(n):{...n,...e}}),[n,e])}function a(e){let n;return n=e.disableParentContext?"function"==typeof e.components?e.components(s):e.components||s:r(e.components),t.createElement(o.Provider,{value:n},e.children)}}}]); \ No newline at end of file +"use strict";(self.webpackChunkformal_land=self.webpackChunkformal_land||[]).push([[5993],{3331:(e,n,i)=>{i.r(n),i.d(n,{assets:()=>l,contentTitle:()=>r,default:()=>d,frontMatter:()=>o,metadata:()=>a,toc:()=>c});var t=i(4848),s=i(8453);const o={title:"\ud83e\udd84 Software correctness from first principles",tags:["formal verification","software correctness","first principles","example","Python"],authors:[]},r=void 0,a={permalink:"/blog/2024/06/05/formal-verification-for-software-correctness",source:"@site/blog/2024-06-05-formal-verification-for-software-correctness.md",title:"\ud83e\udd84 Software correctness from first principles",description:"Formal verification is a technique to verify the absence of bugs in a program by reasoning from first principles. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.",date:"2024-06-05T00:00:00.000Z",formattedDate:"June 5, 2024",tags:[{label:"formal verification",permalink:"/blog/tags/formal-verification"},{label:"software correctness",permalink:"/blog/tags/software-correctness"},{label:"first principles",permalink:"/blog/tags/first-principles"},{label:"example",permalink:"/blog/tags/example"},{label:"Python",permalink:"/blog/tags/python"}],readingTime:7.265,hasTruncateMarker:!0,authors:[],frontMatter:{title:"\ud83e\udd84 Software correctness from first principles",tags:["formal verification","software correctness","first principles","example","Python"],authors:[]},unlisted:!1,nextItem:{title:"\ud83e\udd84 Software correctness from first principles",permalink:"/blog/2024/06/05/software-correctness-from-first-principles"}},l={authorsImageUrls:[]},c=[{value:"Use of formal verification",id:"use-of-formal-verification",level:2},{value:"Definition of programming languages",id:"definition-of-programming-languages",level:2},{value:"Example to verify",id:"example-to-verify",level:2},{value:"Formal verification",id:"formal-verification",level:2},{value:"Completing the property",id:"completing-the-property",level:2},{value:"Conclusion",id:"conclusion",level:2}];function h(e){const n={a:"a",admonition:"admonition",blockquote:"blockquote",code:"code",em:"em",h2:"h2",li:"li",ol:"ol",p:"p",pre:"pre",strong:"strong",ul:"ul",...(0,s.R)(),...e.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(n.p,{children:[(0,t.jsx)(n.strong,{children:"Formal verification"})," is a technique to verify the absence of bugs in a program by reasoning from ",(0,t.jsx)(n.strong,{children:"first principles"}),". Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves."]}),"\n",(0,t.jsx)(n.p,{children:"We will present this idea in detail and illustrate how it works for a very simple example."}),"\n",(0,t.jsx)(n.h2,{id:"use-of-formal-verification",children:"Use of formal verification"}),"\n",(0,t.jsx)(n.p,{children:"We typically use formal verification for critical applications, where either:"}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsx)(n.li,{children:"life is at stake, like in the case of trains, airplanes, medical devices, or"}),"\n",(0,t.jsx)(n.li,{children:"money is at stake, like in the case of financial applications."}),"\n"]}),"\n",(0,t.jsxs)(n.p,{children:["With formal verification, in theory, ",(0,t.jsx)(n.strong,{children:"we can guarantee that the software will never fail"}),", as we can check ",(0,t.jsx)(n.strong,{children:"all possible cases"})," for a given property. A property can be that no non-admin users can read sensitive data, or that a program never fails with uncaught exceptions."]}),"\n",(0,t.jsxs)(n.p,{children:["In this research paper ",(0,t.jsx)(n.a,{href:"https://users.cs.utah.edu/~regehr/papers/pldi11-preprint.pdf",children:"Finding and Understanding Bugs in C Compilers"}),", no bugs were found in the middle-end of the formally verified ",(0,t.jsx)(n.a,{href:"https://en.wikipedia.org/wiki/CompCert",children:"CompCert"})," C compiler, while the other C compilers (GCC, LLVM, ...) all contained subtle bugs. This illustrates that formal verification can be an effective way to make complex software with zero bugs!"]}),"\n",(0,t.jsx)(n.h2,{id:"definition-of-programming-languages",children:"Definition of programming languages"}),"\n",(0,t.jsxs)(n.p,{children:["To be able to reason on a program we go back to the definition of a programming language. These languages (C, JavaScript, Python, ...) are generally defined with a precise set of rules. For example, in Python, the ",(0,t.jsx)(n.code,{children:"if"})," statement is ",(0,t.jsx)(n.a,{href:"https://docs.python.org/3/reference/compound_stmts.html#if",children:"defined in the reference manual"})," by:"]}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:'if_stmt ::= "if" assignment_expression ":" suite\n ("elif" assignment_expression ":" suite)*\n ["else" ":" suite]\n'})}),"\n",(0,t.jsxs)(n.blockquote,{children:["\n",(0,t.jsx)(n.p,{children:"It selects exactly one of the suites by evaluating the expressions one by one until one is found to be true (see section Boolean operations for the definition of true and false); then that suite is executed (and no other part of the if statement is executed or evaluated). If all expressions are false, the suite of the else clause, if present, is executed."}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:"This means that the Python code:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if condition:\n a\nelse:\n b\n"})}),"\n",(0,t.jsxs)(n.p,{children:["will execute ",(0,t.jsx)(n.code,{children:"a"})," when the ",(0,t.jsx)(n.code,{children:"condition"})," is true, and ",(0,t.jsx)(n.code,{children:"b"})," otherwise. There are similar rules for all other program constructs (loops, function definitions, classes, ...)."]}),"\n",(0,t.jsx)(n.p,{children:"To make these rules more manageable, we generally split them into two parts:"}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsxs)(n.li,{children:["The syntax part, that defines what is a valid program in the language. For example, in Python, the syntax is defined by the ",(0,t.jsx)(n.a,{href:"https://docs.python.org/3/reference/grammar.html",children:"grammar"}),"."]}),"\n",(0,t.jsxs)(n.li,{children:["The semantics part, that defines what a program does. This is what we have seen above with the description of the behavior of the ",(0,t.jsx)(n.code,{children:"if"})," statement."]}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:'In formal verification, we will focus on the semantics of programs, assuming that the syntax is already verified by the compiler or interpreter, generating "syntax errors" in case of ill-formed programs.'}),"\n",(0,t.jsx)(n.h2,{id:"example-to-verify",children:"Example to verify"}),"\n",(0,t.jsx)(n.p,{children:"We consider this short Python example of a function returning the maximum number in a list:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"def my_max(l):\n m = l[0]\n for x in l:\n if x > m:\n m = x\n return m\n"})}),"\n",(0,t.jsxs)(n.p,{children:["We assume that the list ",(0,t.jsx)(n.code,{children:"l"})," is not empty and only contains integers. If we run it on a few examples:"]}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"my_max([1, 2, 3]) # => 3\nmy_max([3, 2, 1]) # => 3\nmy_max([1, 3, 2]) # => 3\n"})}),"\n",(0,t.jsxs)(n.p,{children:["it always returns\xa0",(0,t.jsx)(n.code,{children:"3"}),", the biggest number in the list! But can we make sure this is always the case?"]}),"\n",(0,t.jsxs)(n.p,{children:["We can certainly not run\xa0",(0,t.jsx)(n.code,{children:"my_max"})," on all possible lists of integers, as there are infinitely many of them. We need to reason from the definition of the Python language, which is what we call formal verification reasoning."]}),"\n",(0,t.jsx)(n.h2,{id:"formal-verification",children:"Formal verification"}),"\n",(0,t.jsxs)(n.p,{children:["Here is a general specification that we give of the\xa0",(0,t.jsx)(n.code,{children:"my_max"})," function above:"]}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"forall (index : int) (l : list[int]),\n 0 \u2264 index < len(l) \u21d2\n l[index] \u2264 my_max(l)\n"})}),"\n",(0,t.jsxs)(n.p,{children:["It says that for all integer ",(0,t.jsx)(n.code,{children:"index"})," and list of integers ",(0,t.jsx)(n.code,{children:"l"}),", if the index is valid (between\xa0",(0,t.jsx)(n.code,{children:"0"})," and the length of the list), then the element at this index is less than or equal to the maximum of the list that we compute."]}),"\n",(0,t.jsxs)(n.p,{children:["To verify this property for all possible list\xa0",(0,t.jsx)(n.code,{children:"l"}),", we reason by induction. A non-empty list is either:"]}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsx)(n.li,{children:"a list with one element, where the maximum is the only element, or"}),"\n",(0,t.jsx)(n.li,{children:"a list with at least two elements, where the maximum is either the last element or the maximum of the rest of the list."}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:"At the start of the code, we will always have:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"def my_max(l):\n m = l[0]\n"})}),"\n",(0,t.jsxs)(n.p,{children:["with ",(0,t.jsx)(n.code,{children:"m"})," being equal to the first item of the list. Then:"]}),"\n",(0,t.jsxs)(n.ul,{children:["\n",(0,t.jsxs)(n.li,{children:["If the list has only one element, we iterate only once in the ",(0,t.jsx)(n.code,{children:"for"})," loop, with ",(0,t.jsx)(n.code,{children:"x"})," equal to ",(0,t.jsx)(n.code,{children:"l[0]"}),". The condition:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if x > m:\n"})}),"\n","is then equivalent to:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if l[0] > l[0]:\n"})}),"\n","and is always false. We then return ",(0,t.jsx)(n.code,{children:"m = l[0]"}),", which is the only element of the list, and it verifies our property as:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"l[0] \u2264 l[0]\n"})}),"\n"]}),"\n",(0,t.jsxs)(n.li,{children:["If the list has at least two elements, we unroll the code execution of the ",(0,t.jsx)(n.code,{children:"for"})," loop and iterate over all the elements until the last one. Our induction hypothesis tells us that the property we verify is true for the first part of the list, excluding the last element. This means that:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"l[index] \u2264 m\n"})}),"\n","for all ",(0,t.jsx)(n.code,{children:"index"})," between ",(0,t.jsx)(n.code,{children:"0"})," and ",(0,t.jsx)(n.code,{children:"len(l) - 2"}),". When we reach the last element, we have:","\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"if x > m:\n m = x\n"})}),"\n","with ",(0,t.jsx)(n.code,{children:"x"})," being ",(0,t.jsx)(n.code,{children:"l[len(l) - 1]"}),". There are two possibilities. Either ",(0,t.jsx)(n.em,{children:"(i)"})," ",(0,t.jsx)(n.code,{children:"x"})," is less than or equal to ",(0,t.jsx)(n.code,{children:"m"}),", and we do not update ",(0,t.jsx)(n.code,{children:"m"}),", or ",(0,t.jsx)(n.em,{children:"(ii)"})," ",(0,t.jsx)(n.code,{children:"x"})," is greater than ",(0,t.jsx)(n.code,{children:"m"}),", and we update ",(0,t.jsx)(n.code,{children:"m"})," to ",(0,t.jsx)(n.code,{children:"x"}),". In both cases, the property is verified for the last element of the list, as:","\n",(0,t.jsxs)(n.ol,{children:["\n",(0,t.jsxs)(n.li,{children:["In the first case, ",(0,t.jsx)(n.code,{children:"m"})," stays the same, so it is still larger or equal to all the elements of the list except the last one, as well as larger or equal to the last one according to this last\xa0",(0,t.jsx)(n.code,{children:"if"})," statement."]}),"\n",(0,t.jsxs)(n.li,{children:["In the second case, ",(0,t.jsx)(n.code,{children:"m"})," is updated to ",(0,t.jsx)(n.code,{children:"x"}),", which is the last element of the list and a greater value than the original\xa0",(0,t.jsx)(n.code,{children:"m"}),". Then it means that ",(0,t.jsx)(n.code,{children:"m"})," is still larger or equal to all the elements of the list except the last one, being larger that the original\xa0",(0,t.jsx)(n.code,{children:"m"}),", and larger or equal to the last one as it is in fact equals to the last one."]}),"\n"]}),"\n"]}),"\n"]}),"\n",(0,t.jsx)(n.p,{children:"We have now closed our induction proof and verified that our property is true for all possible lists of integers! The reasoning above is rather verbose but should actually correspond to the intuition of most programmers when reading this code."}),"\n",(0,t.jsxs)(n.p,{children:["In practice, with formal verification, the reasoning above is done in a proof assistance such as ",(0,t.jsx)(n.a,{href:"https://coq.inria.fr/",children:"Coq"})," to help making sure that we did not forget any case, and automatically solve simple cases for us. Having a proof written in a proof language like Coq also allows us to re-run it to check that it is still valid after a change in the code, and allows third-party persons to check it without reading all the details."]}),"\n",(0,t.jsx)(n.h2,{id:"completing-the-property",children:"Completing the property"}),"\n",(0,t.jsx)(n.p,{children:"An additional property that we did not verify is:"}),"\n",(0,t.jsx)(n.pre,{children:(0,t.jsx)(n.code,{className:"language-python",children:"forall (l : list[int]),\n exists (index : int),\n 0 \u2264 index < len(l) and\n l[index] = my_max(l)\n"})}),"\n",(0,t.jsx)(n.p,{children:"It says that the maximum of the list is actually in the list. We can verify it by induction in the same way as we did for the first property. You can detail this verification as an exercise."}),"\n",(0,t.jsx)(n.admonition,{title:"For more",type:"info",children:(0,t.jsxs)(n.p,{children:["If you want to go into more details for the formal verification of Python programs, you can look at our ",(0,t.jsx)(n.a,{href:"https://github.com/formal-land/coq-of-python",children:"coq-of-python"})," project, where we define the semantics of Python in Coq and verify properties of Python programs (ongoing project!). We also provide formal verification services for ",(0,t.jsx)(n.a,{href:"https://github.com/formal-land/coq-of-rust",children:"Rust"})," and other languages like ",(0,t.jsx)(n.a,{href:"https://github.com/formal-land/coq-of-ocaml",children:"OCaml"}),". Contact us at\xa0",(0,t.jsx)(n.a,{href:"mailto:contact@formal.land",children:"contact@formal.land"})," to discuss!"]})}),"\n",(0,t.jsx)(n.h2,{id:"conclusion",children:"Conclusion"}),"\n",(0,t.jsxs)(n.p,{children:["We have presented here the idea of ",(0,t.jsx)(n.strong,{children:"formal verification"}),", a technique to verify the absence of bugs in a program by reasoning from ",(0,t.jsx)(n.strong,{children:"first principles"}),". We have illustrated this idea for a simple Python example, showing how we can verify that a function computing the maximum of a list is correct ",(0,t.jsx)(n.strong,{children:"for all possible lists of integers"}),"."]}),"\n",(0,t.jsx)(n.p,{children:"We will continue with more blog posts explaining what we can do with formal verification and why it matters. Feel free to share this post and to tell us what subjects you want to see covered!"})]})}function d(e={}){const{wrapper:n}={...(0,s.R)(),...e.components};return n?(0,t.jsx)(n,{...e,children:(0,t.jsx)(h,{...e})}):h(e)}},8453:(e,n,i)=>{i.d(n,{R:()=>r,x:()=>a});var t=i(6540);const s={},o=t.createContext(s);function r(e){const n=t.useContext(o);return t.useMemo((function(){return"function"==typeof e?e(n):{...n,...e}}),[n,e])}function a(e){let n;return n=e.disableParentContext?"function"==typeof e.components?e.components(s):e.components||s:r(e.components),t.createElement(o.Provider,{value:n},e.children)}}}]); \ No newline at end of file diff --git a/assets/js/b2f554cd.b4593d13.js b/assets/js/b2f554cd.b4593d13.js deleted file mode 100644 index 0a3253ab..00000000 --- a/assets/js/b2f554cd.b4593d13.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkformal_land=self.webpackChunkformal_land||[]).push([[5894],{6042:e=>{e.exports=JSON.parse('{"blogPosts":[{"id":"/2024/06/05/formal-verification-for-software-correctness","metadata":{"permalink":"/blog/2024/06/05/formal-verification-for-software-correctness","source":"@site/blog/2024-06-05-formal-verification-for-software-correctness.md","title":"\ud83e\udd84 Software correctness from first principles","description":"Formal verification is a technique to verify the absence of bugs in a program by reasoning from first principles. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.","date":"2024-06-05T00:00:00.000Z","formattedDate":"June 5, 2024","tags":[{"label":"formal verification","permalink":"/blog/tags/formal-verification"},{"label":"software correctness","permalink":"/blog/tags/software-correctness"},{"label":"first principles","permalink":"/blog/tags/first-principles"},{"label":"example","permalink":"/blog/tags/example"},{"label":"Python","permalink":"/blog/tags/python"}],"readingTime":7.265,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd84 Software correctness from first principles","tags":["formal verification","software correctness","first principles","example","Python"],"authors":[]},"unlisted":false,"nextItem":{"title":"\ud83e\udd84 Software correctness from first principles","permalink":"/blog/2024/06/05/software-correctness-from-first-principles"}},"content":"**Formal verification** is a technique to verify the absence of bugs in a program by reasoning from **first principles**. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.\\n\\nWe will present this idea in detail and illustrate how it works for a very simple example.\\n\\n\x3c!-- truncate --\x3e\\n\\n## Use of formal verification\\n\\nWe typically use formal verification for critical applications, where either:\\n\\n- life is at stake, like in the case of trains, airplanes, medical devices, or\\n- money is at stake, like in the case of financial applications.\\n\\nWith formal verification, in theory, **we can guarantee that the software will never fail**, as we can check **all possible cases** for a given property. A property can be that no non-admin users can read sensitive data, or that a program never fails with uncaught exceptions.\\n\\nIn this research paper [Finding and Understanding Bugs in C Compilers](https://users.cs.utah.edu/~regehr/papers/pldi11-preprint.pdf), no bugs were found in the middle-end of the formally verified [CompCert](https://en.wikipedia.org/wiki/CompCert) C compiler, while the other C compilers (GCC, LLVM, ...) all contained subtle bugs. This illustrates that formal verification can be an effective way to make complex software with zero bugs!\\n\\n## Definition of programming languages\\n\\nTo be able to reason on a program we go back to the definition of a programming language. These languages (C, JavaScript, Python, ...) are generally defined with a precise set of rules. For example, in Python, the `if` statement is [defined in the reference manual](https://docs.python.org/3/reference/compound_stmts.html#if) by:\\n\\n```python\\nif_stmt ::= \\"if\\" assignment_expression \\":\\" suite\\n (\\"elif\\" assignment_expression \\":\\" suite)*\\n [\\"else\\" \\":\\" suite]\\n```\\n> It selects exactly one of the suites by evaluating the expressions one by one until one is found to be true (see section Boolean operations for the definition of true and false); then that suite is executed (and no other part of the if statement is executed or evaluated). If all expressions are false, the suite of the else clause, if present, is executed.\\n\\nThis means that the Python code:\\n\\n```python\\nif condition:\\n a\\nelse:\\n b\\n```\\n\\nwill execute `a` when the `condition` is true, and `b` otherwise. There are similar rules for all other program constructs (loops, function definitions, classes, ...).\\n\\nTo make these rules more manageable, we generally split them into two parts:\\n\\n- The syntax part, that defines what is a valid program in the language. For example, in Python, the syntax is defined by the [grammar](https://docs.python.org/3/reference/grammar.html).\\n- The semantics part, that defines what a program does. This is what we have seen above with the description of the behavior of the `if` statement.\\n\\nIn formal verification, we will focus on the semantics of programs, assuming that the syntax is already verified by the compiler or interpreter, generating \\"syntax errors\\" in case of ill-formed programs.\\n\\n## Example to verify\\n\\nWe consider this short Python example of a function returning the maximum number in a list:\\n\\n```python\\ndef my_max(l):\\n m = l[0]\\n for x in l:\\n if x > m:\\n m = x\\n return m\\n```\\n\\nWe assume that the list `l` is not empty and only contains integers. If we run it on a few examples:\\n\\n```python\\nmy_max([1, 2, 3]) # => 3\\nmy_max([3, 2, 1]) # => 3\\nmy_max([1, 3, 2]) # => 3\\n```\\n\\nit always returns `3`, the biggest number in the list! But can we make sure this is always the case?\\n\\nWe can certainly not run `my_max` on all possible lists of integers, as there are infinitely many of them. We need to reason from the definition of the Python language, which is what we call formal verification reasoning.\\n\\n## Formal verification\\n\\nHere is a general specification that we give of the `my_max` function above:\\n\\n```python\\nforall (index : int) (l : list[int]),\\n 0 \u2264 index < len(l) \u21d2\\n l[index] \u2264 my_max(l)\\n```\\n\\nIt says that for all integer `index` and list of integers `l`, if the index is valid (between `0` and the length of the list), then the element at this index is less than or equal to the maximum of the list that we compute.\\n\\nTo verify this property for all possible list `l`, we reason by induction. A non-empty list is either:\\n\\n- a list with one element, where the maximum is the only element, or\\n- a list with at least two elements, where the maximum is either the last element or the maximum of the rest of the list.\\n\\nAt the start of the code, we will always have:\\n\\n```python\\ndef my_max(l):\\n m = l[0]\\n```\\n\\nwith `m` being equal to the first item of the list. Then:\\n\\n- If the list has only one element, we iterate only once in the `for` loop, with `x` equal to `l[0]`. The condition:\\n ```python\\n if x > m:\\n ```\\n is then equivalent to:\\n ```python\\n if l[0] > l[0]:\\n ```\\n and is always false. We then return `m = l[0]`, which is the only element of the list, and it verifies our property as:\\n ```python\\n l[0] \u2264 l[0]\\n ```\\n- If the list has at least two elements, we unroll the code execution of the `for` loop and iterate over all the elements until the last one. Our induction hypothesis tells us that the property we verify is true for the first part of the list, excluding the last element. This means that:\\n ```python\\n l[index] \u2264 m\\n ```\\n for all `index` between `0` and `len(l) - 2`. When we reach the last element, we have:\\n ```python\\n if x > m:\\n m = x\\n ```\\n with `x` being `l[len(l) - 1]`. There are two possibilities. Either *(i)* `x` is less than or equal to `m`, and we do not update `m`, or *(ii)* `x` is greater than `m`, and we update `m` to `x`. In both cases, the property is verified for the last element of the list, as:\\n 1. In the first case, `m` stays the same, so it is still larger or equal to all the elements of the list except the last one, as well as larger or equal to the last one according to this last `if` statement.\\n 2. In the second case, `m` is updated to `x`, which is the last element of the list and a greater value than the original `m`. Then it means that `m` is still larger or equal to all the elements of the list except the last one, being larger that the original `m`, and larger or equal to the last one as it is in fact equals to the last one.\\n\\nWe have now closed our induction proof and verified that our property is true for all possible lists of integers! The reasoning above is rather verbose but should actually correspond to the intuition of most programmers when reading this code.\\n\\nIn practice, with formal verification, the reasoning above is done in a proof assistance such as [Coq](https://coq.inria.fr/) to help making sure that we did not forget any case, and automatically solve simple cases for us. Having a proof written in a proof language like Coq also allows us to re-run it to check that it is still valid after a change in the code, and allows third-party persons to check it without reading all the details.\\n\\n## Completing the property\\n\\nAn additional property that we did not verify is:\\n\\n```python\\nforall (l : list[int]),\\n exists (index : int),\\n 0 \u2264 index < len(l) and\\n l[index] = my_max(l)\\n```\\n\\nIt says that the maximum of the list is actually in the list. We can verify it by induction in the same way as we did for the first property. You can detail this verification as an exercise.\\n\\n:::info For more\\n\\nIf you want to go into more details for the formal verification of Python programs, you can look at our [coq-of-python](https://github.com/formal-land/coq-of-python) project, where we define the semantics of Python in Coq and verify properties of Python programs (ongoing project!). We also provide formal verification services for [Rust](https://github.com/formal-land/coq-of-rust) and other languages like [OCaml](https://github.com/formal-land/coq-of-ocaml). Contact us at [contact@formal.land](mailto:contact@formal.land) to discuss!\\n\\n:::\\n\\n## Conclusion\\n\\nWe have presented here the idea of **formal verification**, a technique to verify the absence of bugs in a program by reasoning from **first principles**. We have illustrated this idea for a simple Python example, showing how we can verify that a function computing the maximum of a list is correct **for all possible lists of integers**.\\n\\nWe will continue with more blog posts explaining what we can do with formal verification and why it matters. Feel free to share this post and tell us what subjects you would like to see covered!"},{"id":"/2024/06/05/software-correctness-from-first-principles","metadata":{"permalink":"/blog/2024/06/05/software-correctness-from-first-principles","source":"@site/blog/2024-06-05-software-correctness-from-first-principles.md","title":"\ud83e\udd84 Software correctness from first principles","description":"Formal verification is a technique to verify the absence of bugs in a program by reasoning from first principles. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.","date":"2024-06-05T00:00:00.000Z","formattedDate":"June 5, 2024","tags":[{"label":"formal verification","permalink":"/blog/tags/formal-verification"},{"label":"software correctness","permalink":"/blog/tags/software-correctness"},{"label":"first principles","permalink":"/blog/tags/first-principles"},{"label":"example","permalink":"/blog/tags/example"},{"label":"Python","permalink":"/blog/tags/python"}],"readingTime":7.26,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83e\udd84 Software correctness from first principles","tags":["formal verification","software correctness","first principles","example","Python"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd84 Software correctness from first principles","permalink":"/blog/2024/06/05/formal-verification-for-software-correctness"},"nextItem":{"title":"\ud83d\udc0d Simulation of Python code from traces in Coq","permalink":"/blog/2024/05/22/translation-of-python-code-simulations-from-trace"}},"content":"**Formal verification** is a technique to verify the absence of bugs in a program by reasoning from **first principles**. Instead of testing a program on examples, what can only cover a finite number of cases, formal verification checks all possible cases. It does so by going back to the definition of programming languages, showing why the whole code is correct given how each individual keyword behaves.\\n\\nWe will present this idea in detail and illustrate how it works for a very simple example.\\n\\n\x3c!-- truncate --\x3e\\n\\n## Use of formal verification\\n\\nWe typically use formal verification for critical applications, where either:\\n\\n- life is at stake, like in the case of trains, airplanes, medical devices, or\\n- money is at stake, like in the case of financial applications.\\n\\nWith formal verification, in theory, **we can guarantee that the software will never fail**, as we can check **all possible cases** for a given property. A property can be that no non-admin users can read sensitive data, or that a program never fails with uncaught exceptions.\\n\\nIn this research paper [Finding and Understanding Bugs in C Compilers](https://users.cs.utah.edu/~regehr/papers/pldi11-preprint.pdf), no bugs were found in the middle-end of the formally verified [CompCert](https://en.wikipedia.org/wiki/CompCert) C compiler, while the other C compilers (GCC, LLVM, ...) all contained subtle bugs. This illustrates that formal verification can be an effective way to make complex software with zero bugs!\\n\\n## Definition of programming languages\\n\\nTo be able to reason on a program we go back to the definition of a programming language. These languages (C, JavaScript, Python, ...) are generally defined with a precise set of rules. For example, in Python, the `if` statement is [defined in the reference manual](https://docs.python.org/3/reference/compound_stmts.html#if) by:\\n\\n```python\\nif_stmt ::= \\"if\\" assignment_expression \\":\\" suite\\n (\\"elif\\" assignment_expression \\":\\" suite)*\\n [\\"else\\" \\":\\" suite]\\n```\\n> It selects exactly one of the suites by evaluating the expressions one by one until one is found to be true (see section Boolean operations for the definition of true and false); then that suite is executed (and no other part of the if statement is executed or evaluated). If all expressions are false, the suite of the else clause, if present, is executed.\\n\\nThis means that the Python code:\\n\\n```python\\nif condition:\\n a\\nelse:\\n b\\n```\\n\\nwill execute `a` when the `condition` is true, and `b` otherwise. There are similar rules for all other program constructs (loops, function definitions, classes, ...).\\n\\nTo make these rules more manageable, we generally split them into two parts:\\n\\n- The syntax part, that defines what is a valid program in the language. For example, in Python, the syntax is defined by the [grammar](https://docs.python.org/3/reference/grammar.html).\\n- The semantics part, that defines what a program does. This is what we have seen above with the description of the behavior of the `if` statement.\\n\\nIn formal verification, we will focus on the semantics of programs, assuming that the syntax is already verified by the compiler or interpreter, generating \\"syntax errors\\" in case of ill-formed programs.\\n\\n## Example to verify\\n\\nWe consider this short Python example of a function returning the maximum number in a list:\\n\\n```python\\ndef my_max(l):\\n m = l[0]\\n for x in l:\\n if x > m:\\n m = x\\n return m\\n```\\n\\nWe assume that the list `l` is not empty and only contains integers. If we run it on a few examples:\\n\\n```python\\nmy_max([1, 2, 3]) # => 3\\nmy_max([3, 2, 1]) # => 3\\nmy_max([1, 3, 2]) # => 3\\n```\\n\\nit always returns `3`, the biggest number in the list! But can we make sure this is always the case?\\n\\nWe can certainly not run `my_max` on all possible lists of integers, as there are infinitely many of them. We need to reason from the definition of the Python language, which is what we call formal verification reasoning.\\n\\n## Formal verification\\n\\nHere is a general specification that we give of the `my_max` function above:\\n\\n```python\\nforall (index : int) (l : list[int]),\\n 0 \u2264 index < len(l) \u21d2\\n l[index] \u2264 my_max(l)\\n```\\n\\nIt says that for all integer `index` and list of integers `l`, if the index is valid (between `0` and the length of the list), then the element at this index is less than or equal to the maximum of the list that we compute.\\n\\nTo verify this property for all possible list `l`, we reason by induction. A non-empty list is either:\\n\\n- a list with one element, where the maximum is the only element, or\\n- a list with at least two elements, where the maximum is either the last element or the maximum of the rest of the list.\\n\\nAt the start of the code, we will always have:\\n\\n```python\\ndef my_max(l):\\n m = l[0]\\n```\\n\\nwith `m` being equal to the first item of the list. Then:\\n\\n- If the list has only one element, we iterate only once in the `for` loop, with `x` equal to `l[0]`. The condition:\\n ```python\\n if x > m:\\n ```\\n is then equivalent to:\\n ```python\\n if l[0] > l[0]:\\n ```\\n and is always false. We then return `m = l[0]`, which is the only element of the list, and it verifies our property as:\\n ```python\\n l[0] \u2264 l[0]\\n ```\\n- If the list has at least two elements, we unroll the code execution of the `for` loop and iterate over all the elements until the last one. Our induction hypothesis tells us that the property we verify is true for the first part of the list, excluding the last element. This means that:\\n ```python\\n l[index] \u2264 m\\n ```\\n for all `index` between `0` and `len(l) - 2`. When we reach the last element, we have:\\n ```python\\n if x > m:\\n m = x\\n ```\\n with `x` being `l[len(l) - 1]`. There are two possibilities. Either *(i)* `x` is less than or equal to `m`, and we do not update `m`, or *(ii)* `x` is greater than `m`, and we update `m` to `x`. In both cases, the property is verified for the last element of the list, as:\\n 1. In the first case, `m` stays the same, so it is still larger or equal to all the elements of the list except the last one, as well as larger or equal to the last one according to this last `if` statement.\\n 2. In the second case, `m` is updated to `x`, which is the last element of the list and a greater value than the original `m`. Then it means that `m` is still larger or equal to all the elements of the list except the last one, being larger that the original `m`, and larger or equal to the last one as it is in fact equals to the last one.\\n\\nWe have now closed our induction proof and verified that our property is true for all possible lists of integers! The reasoning above is rather verbose but should actually correspond to the intuition of most programmers when reading this code.\\n\\nIn practice, with formal verification, the reasoning above is done in a proof assistance such as [Coq](https://coq.inria.fr/) to help making sure that we did not forget any case and add automation for most simple cases. Having a proof written in a proof language like Coq also allows us to re-run it to check that it is still valid after a change in the code, or some third-party person to check it without reading all the details.\\n\\n## Completing the property\\n\\nAn additional property that we did not verify is:\\n\\n```python\\nforall (l : list[int]),\\n exists (index : int),\\n 0 \u2264 index < len(l) and\\n l[index] = my_max(l)\\n```\\n\\nIt says that the maximum of the list is actually in the list. We can verify it by induction in the same way as we did for the first property. You can detail this verification as an exercise.\\n\\n:::info Contact\\n\\nIf you want to go into more details for the formal verification of Python programs, you can look at our [coq-of-python](https://github.com/formal-land/coq-of-python) project, where we define the semantics of Python in Coq and verify properties of Python programs (ongoing project!). We also provide formal verification services for [Rust](https://github.com/formal-land/coq-of-rust) and other languages like [OCaml](https://github.com/formal-land/coq-of-ocaml). Contact us at [contact@formal.land](mailto:contact@formal.land) to discuss!\\n\\n:::\\n\\n## Conclusion\\n\\nWe have presented here the idea of **formal verification**, a technique to verify the absence of bugs in a program by reasoning from **first principles**. We have illustrated this idea for a simple Python example, showing how we can verify that a function computing the maximum of a list is correct **for all possible lists of integers**.\\n\\nWe will continue with more blog posts explaining what we can do with formal verification and why it matters. Feel free to share this post and tell us what subjects you would like to see covered!"},{"id":"/2024/05/22/translation-of-python-code-simulations-from-trace","metadata":{"permalink":"/blog/2024/05/22/translation-of-python-code-simulations-from-trace","source":"@site/blog/2024-05-22-translation-of-python-code-simulations-from-trace.md","title":"\ud83d\udc0d Simulation of Python code from traces in Coq","description":"In order to formally verify Python code in Coq our approach is the following:","date":"2024-05-22T00:00:00.000Z","formattedDate":"May 22, 2024","tags":[{"label":"coq-of-python","permalink":"/blog/tags/coq-of-python"},{"label":"Python","permalink":"/blog/tags/python"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"},{"label":"Ethereum","permalink":"/blog/tags/ethereum"},{"label":"simulation","permalink":"/blog/tags/simulation"},{"label":"trace","permalink":"/blog/tags/trace"}],"readingTime":8.59,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83d\udc0d Simulation of Python code from traces in Coq","tags":["coq-of-python","Python","Coq","translation","Ethereum","simulation","trace"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83e\udd84 Software correctness from first principles","permalink":"/blog/2024/06/05/software-correctness-from-first-principles"},"nextItem":{"title":"\ud83d\udc0d Simulation of Python code in Coq","permalink":"/blog/2024/05/14/translation-of-python-code-simulations"}},"content":"In order to formally verify Python code in Coq our approach is the following:\\n\\n1. Import Python code in Coq by running [coq-of-python](https://github.com/formal-land/coq-of-python).\\n2. Write a purely functional simulation in Coq of the code.\\n3. Show that this simulation is equivalent to the translation.\\n4. Verify the simulation.\\n\\nWe will show in this article how we can merge the steps 2. and 3. to save time in the verification process. We do so by relying on the proof mode of Coq and unification.\\n\\nOur mid-term goal is to formally specify the [Ethereum Virtual Machine](https://ethereum.org/en/developers/docs/evm/) (EVM) and prove that this specification is correct according to [reference implementation of the EVM](https://github.com/ethereum/execution-specs) in Python. This would ensure that it is always up-to-date and exhaustive. The code of this project is open-source and available on GitHub: [formal-land/coq-of-python](https://github.com/formal-land/coq-of-python).\\n\\n\x3c!-- truncate --\x3e\\n\\n\\n\\n## Our Python\'s monad \ud83d\udc0d\\n\\nWe put the Python code that we import in Coq in a monad `M` to represent all the features that are hard to express in Coq, mainly the side effects. This monad is a combination of two levels:\\n\\n- `LowM` for the side effects except the control flow.\\n- `M` that adds an error monad on top of `LowM` to handle the control flow (exceptions, `break` instruction, ...).\\n\\n### LowM\\n\\nHere is the definition of the `LowM` monad in [CoqOfPython.v](https://github.com/formal-land/coq-of-python/blob/main/CoqOfPython/CoqOfPython.v):\\n\\n```coq\\nModule Primitive.\\n Inductive t : Set -> Set :=\\n | StateAlloc (object : Object.t Value.t) : t (Pointer.t Value.t)\\n | StateRead (mutable : Pointer.Mutable.t Value.t) : t (Object.t Value.t)\\n | StateWrite (mutable : Pointer.Mutable.t Value.t) (update : Object.t Value.t) : t unit\\n | GetInGlobals (globals : Globals.t) (name : string) : t Value.t.\\nEnd Primitive.\\n\\nModule LowM.\\n Inductive t (A : Set) : Set :=\\n | Pure (a : A)\\n | CallPrimitive {B : Set} (primitive : Primitive.t B) (k : B -> t A)\\n | CallClosure {B : Set} (closure : Data.t Value.t) (args kwargs : Value.t) (k : B -> t A)\\n | Impossible.\\n Arguments Pure {_}.\\n Arguments CallPrimitive {_ _}.\\n Arguments CallClosure {_ _}.\\n Arguments Impossible {_}.\\n\\n Fixpoint bind {A B : Set} (e1 : t A) (e2 : A -> t B) : t B :=\\n match e1 with\\n | Pure a => e2 a\\n | CallPrimitive primitive k => CallPrimitive primitive (fun v => bind (k v) e2)\\n | CallClosure closure args kwargs k => CallClosure closure args kwargs (fun a => bind (k a) e2)\\n | Impossible => Impossible\\n end.\\nEnd LowM.\\n```\\n\\nThis is a monad defined by continuation (the variable `k`):\\n\\n- We terminate a computation with the primitive `Pure` and some result `a`, that can be any purely functional expression.\\n- We can call some primitives grouped in `Primitive.t` that are side effects:\\n - `StateAlloc` to allocate a new object in the memory,\\n - `StateRead` to read an object from the memory,\\n - `StateWrite` to write an object in the memory,\\n - `GetInGlobals` to read a global variable, doing name resolution. This is a side effects as function definitions in Python do not need to be ordered.\\n- We can call a closure (an anonymous function) with `CallClosure`. This is required for termination, as we cannot define an eval function on the type of Python values since some do not terminate like the [\u03a9 expression](https://medium.com/@dkeout/why-you-must-actually-understand-the-%CF%89-and-y-combinators-c9204241da7a). See our previous post [Translation of Python code to Coq](/blog/2024/05/10/translation-of-python-code) for our definition of Python values. The combinator `CallClosure` is also very convenient to modularize our proofs: we reason on each closure independently.\\n- We can mark a code path as unreachable with `Impossible`.\\n\\n### M\\n\\nThe final monad `M` is defined as:\\n\\n```coq\\nDefinition M : Set :=\\n LowM.t (Value.t + Exception.t).\\n```\\n\\nIt has no parameters as Python is untyped, so all expressions have the same result type:\\n\\n- either a success value of type `Value.t`,\\n- or an exception of type `Exception.t`, with some special cases to represent a `return`, a `break`, or a `continue` instruction.\\n\\nWe define the monadic bind of `M` like for the error monad:\\n\\n```coq\\nDefinition bind (e1 : M) (e2 : Value.t -> M) : M :=\\n LowM.bind e1 (fun v => match v with\\n | inl v => e2 v\\n | inr e => LowM.Pure (inr e)\\n end).\\n```\\n\\n## Traces \ud83d\udc3e\\n\\nWe define our semantics of a computation `e` of type `M` in [simulations/proofs/CoqOfPython.v](https://github.com/formal-land/coq-of-python/blob/main/CoqOfPython/simulations/proofs/CoqOfPython.v) with the predicate:\\n\\n```coq\\n{{ stack, heap | e \u21d3 to_value | P_stack, P_heap }}\\n```\\n\\nthat we call a _run_ or a _trace_, saying that:\\n\\n- starting from the initial state `stack`, `heap`,\\n- the computation `e` terminates with a value,\\n- that is in the image of the function `to_value`,\\n- and with a final stack and heap that satisfy the predicates `P_stack` and `P_heap`.\\n\\nNote that we do not explicit the resulting value and memory state of a computation in this predicate. We only say that it exists and verifies a few properties, that are here for compositionality. We have a purely functional function `evaluate` that can derive the result of a run of a computation:\\n\\n```coq\\nevaluate :\\n forall `{Heap.Trait} {A B : Set}\\n {stack : Stack.t} {heap : Heap} {e : LowM.t B}\\n {to_value : A -> B} {P_stack : Stack.t -> Prop} {P_heap : Heap -> Prop}\\n (run : {{ stack, heap | e \u21d3 to_value | P_stack, P_heap }}),\\n A * { stack : Stack.t | P_stack stack } * { heap : Heap | P_heap heap }\\n```\\n\\nThe function `evaluate` is defined in Coq by a `Fixpoint`. Its result is what we call a _simulation_, which is a purely functional definition equivalent to the orignal computation `e` from Python. It is equivalent by construction.\\n\\n## Building a trace \ud83d\udd28\\n\\nA trace is an inductive in `Set` that we can build with the following constructors:\\n\\n```coq\\nInductive t `{Heap.Trait} {A B : Set}\\n (stack : Stack.t) (heap : Heap)\\n (to_value : A -> B) (P_stack : Stack.t -> Prop) (P_heap : Heap -> Prop) :\\n LowM.t B -> Set :=\\n(* [Pure] primitive *)\\n| Pure\\n (result : A)\\n (result\' : B) :\\n result\' = to_value result ->\\n P_stack stack ->\\n P_heap heap ->\\n {{ stack, heap |\\n LowM.Pure result\' \u21d3\\n to_value\\n | P_stack, P_heap }}\\n(* [StateRead] primitive *)\\n| CallPrimitiveStateRead\\n (mutable : Pointer.Mutable.t Value.t)\\n (object : Object.t Value.t)\\n (k : Object.t Value.t -> LowM.t B) :\\n IsRead.t stack heap mutable object ->\\n {{ stack, heap |\\n k object \u21d3\\n to_value\\n | P_stack, P_heap }} ->\\n {{ stack, heap |\\n LowM.CallPrimitive (Primitive.StateRead mutable) k \u21d3\\n to_value\\n | P_stack, P_heap }}\\n(* [CallClosure] primitive *)\\n| CallClosure {C : Set}\\n (f : Value.t -> Value.t -> M)\\n (args kwargs : Value.t)\\n (to_value_inter : C -> Value.t + Exception.t)\\n (P_stack_inter : Stack.t -> Prop) (P_heap_inter : Heap -> Prop)\\n (k : Value.t + Exception.t -> LowM.t B) :\\n let closure := Data.Closure f in\\n {{ stack, heap |\\n f args kwargs \u21d3\\n to_value_inter\\n | P_stack_inter, P_heap_inter }} ->\\n (* We quantify over every possible values as we cannot compute the result of the closure here.\\n We only know that it exists and respects some constraints in this inductive definition. *)\\n (forall value_inter stack_inter heap_inter,\\n P_stack_inter stack_inter ->\\n P_heap_inter heap_inter ->\\n {{ stack_inter, heap_inter |\\n k (to_value_inter value_inter) \u21d3\\n to_value\\n | P_stack, P_heap }}\\n ) ->\\n {{ stack, heap |\\n LowM.CallClosure closure args kwargs k \u21d3\\n to_value\\n | P_stack, P_heap }}\\n(* ...cases for the other primitives of the monad... *)\\n```\\n\\n### Pure\\n\\nIn the `Pure` case we return the final result of the computation. We check the state fulfills the predicate `P_stack` and `P_heap`, and that the result is the image by the function `to_value` of some `result`.\\n\\n### CallPrimitiveStateRead\\n\\nTo read a value in memory, we rely on another predicate `IsRead` that checks if the `mutable` pointer is valid in the `stack` or `heap` and that the `object` is the value at this pointer. We then call the continuation `k` with this object. We have similar rules for allocating a new object in memory and writing at a pointer.\\n\\nNote that we parameterize all our semantics by `` `{Heap.Trait}`` that provides a specific `Heap` type with read and write primitives. We can choose the implementation of the memory model that we want to use in our simulations in order to simplify the reasoning.\\n\\n### CallClosure\\n\\nTo call a closure, we first evaluate the closure with the arguments and keyword arguments. We then call the continuation `k` with the result of the closure. We quantify over all possible results of the closure, as we cannot compute it here. This would require to be able to define `Fixpoint` together with `Inductive`, which is not possible in Coq. So we only know that the result of the closure exists, and can use the constraints on its result (the function `to_value` and the predicates `P_stack_inter` and `P_heap_inter`) to build a run of the continuation.\\n\\nThe other constructors are not presented here but are similar to the above. We will also add a monadic primitive for loops with the following idea: we show that a loop terminates by building a trace, as traces are `Inductive` so must be finite. We have no rules for the `Impossible` case so that building the trace of a computation also shows that the `Impossible` calls are in unreachable paths.\\n\\n## Example \ud83d\udd0d\\n\\nWe have applied these technique to a small code example with allocation, memory read, and closure call primitives. We were able to show that the resulting simulation obtained by running `evaluate` on the trace is equal to a simulation written by hand. The proof was just the tactic `reflexivity`. We believe that we can automate most of the tactics used to build a run, except for the allocations were the user needs to make a choice (immediate, stack, or heap allocation, which address, ...).\\n\\nTo continue our experiments we now need to complete our semantics of Python, especially to take into account method and operator calls.\\n\\n## Conclusion\\n\\nWe have presented an alternative way to build simulations of imperative Python code in purely functional Coq code. The idea is to enable faster reasoning over Python code by removing the need to build explicit simulations. We plan to port this technique to other tools like [coq-of-rust](https://github.com/formal-land/coq-of-rust) as well.\\n\\nTo see what we can do for you talk with us at [contact@formal.land](mailto:contact@formal.land) \ud83c\udfc7. For our previous projects, see our [formal verification of the Tezos\' L1](https://formal-land.gitlab.io/coq-tezos-of-ocaml/)!"},{"id":"/2024/05/14/translation-of-python-code-simulations","metadata":{"permalink":"/blog/2024/05/14/translation-of-python-code-simulations","source":"@site/blog/2024-05-14-translation-of-python-code-simulations.md","title":"\ud83d\udc0d Simulation of Python code in Coq","description":"We are continuing to specify the Ethereum Virtual Machine (EVM) in the formal verification language Coq. We are working from the automatic translation in Coq of the reference implementation of the EVM, which is written in the language Python.","date":"2024-05-14T00:00:00.000Z","formattedDate":"May 14, 2024","tags":[{"label":"coq-of-python","permalink":"/blog/tags/coq-of-python"},{"label":"Python","permalink":"/blog/tags/python"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"},{"label":"Ethereum","permalink":"/blog/tags/ethereum"}],"readingTime":6.63,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83d\udc0d Simulation of Python code in Coq","tags":["coq-of-python","Python","Coq","translation","Ethereum"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83d\udc0d Simulation of Python code from traces in Coq","permalink":"/blog/2024/05/22/translation-of-python-code-simulations-from-trace"},"nextItem":{"title":"\ud83d\udc0d Translation of Python code to Coq","permalink":"/blog/2024/05/10/translation-of-python-code"}},"content":"We are continuing to specify the [Ethereum Virtual Machine](https://ethereum.org/en/developers/docs/evm/) (EVM) in the formal verification language [Coq](https://coq.inria.fr/). We are working from the [automatic translation in Coq](https://github.com/formal-land/coq-of-python/tree/main/CoqOfPython/ethereum) of the [reference implementation of the EVM](https://github.com/ethereum/execution-specs), which is written in the language [Python](https://www.python.org/).\\n\\nIn this article, we will see how we specify the EVM in Coq by writing an interpreter that closely mimics the behavior of the Python code. We call that implementation a _simulation_ as it aims to reproduce the behavior of the Python code, the reference.\\n\\nIn contrast to the automatic translation from Python, the simulation is a manual translation written in idiomatic Coq. We expect it to be ten times smaller in lines compared to the automatic translation, and of about the same size as the Python code. This is because the automatic translation needs to encode all the Python specific features in Coq, like variable mutations and the class system.\\n\\nIn the following article, we will show how we can prove that the simulation is correct, meaning that it behaves exactly as the automatic translation.\\n\\nThe code of this project is open-source and available on GitHub: [formal-land/coq-of-python](https://github.com/formal-land/coq-of-python). This work follows a call from [Vitalik Buterin](https://en.wikipedia.org/wiki/Vitalik_Buterin) for more formal verification of the Ethereum\'s code.\\n\\n\x3c!-- truncate --\x3e\\n\\n\\n\\n## The `add` function \ud83e\uddee\\n\\nWe focus on a simulation for the `add` function in [vm/instructions/arithmetic.py](https://github.com/ethereum/execution-specs/blob/master/src/ethereum/paris/vm/instructions/arithmetic.py) that implements the addition primitive of the EVM. The Python code is:\\n\\n```python\\ndef add(evm: Evm) -> None:\\n \\"\\"\\"\\n Adds the top two elements of the stack together, and pushes the result back\\n on the stack.\\n\\n Parameters\\n ----------\\n evm :\\n The current EVM frame.\\n\\n \\"\\"\\"\\n # STACK\\n x = pop(evm.stack)\\n y = pop(evm.stack)\\n\\n # GAS\\n charge_gas(evm, GAS_VERY_LOW)\\n\\n # OPERATION\\n result = x.wrapping_add(y)\\n\\n push(evm.stack, result)\\n\\n # PROGRAM COUNTER\\n evm.pc += 1\\n```\\n\\nMost of the functions of the interpreter are written in this style. They take the global state of the interpreter, called `Evm` as input, and mutate it with the effect of the current instruction.\\n\\nThe `Evm` structure is defined as:\\n\\n```python\\n@dataclass\\nclass Evm:\\n \\"\\"\\"The internal state of the virtual machine.\\"\\"\\"\\n\\n pc: Uint\\n stack: List[U256]\\n memory: bytearray\\n code: Bytes\\n gas_left: Uint\\n env: Environment\\n valid_jump_destinations: Set[Uint]\\n logs: Tuple[Log, ...]\\n refund_counter: int\\n running: bool\\n message: Message\\n output: Bytes\\n accounts_to_delete: Set[Address]\\n touched_accounts: Set[Address]\\n return_data: Bytes\\n error: Optional[Exception]\\n accessed_addresses: Set[Address]\\n accessed_storage_keys: Set[Tuple[Address, Bytes32]]\\n```\\n\\nIt contains the current instruction pointer `pc`, the stack of the EVM, the memory, the code, the gas left, ...\\n\\nAs the EVM is a stack-based machine, the addition function does the following:\\n\\n1. It pops the two top elements of the stack `x` and `y`,\\n2. It charges a very low amount of gas,\\n3. It computes the result of the addition `result = x + y`,\\n4. It pushes the result back on the stack,\\n5. It increments the program counter `pc`.\\n\\nNote that all these operations might fail and raise an exception, for example,if the stack is empty when we pop `x`and `y` at the beginning.\\n\\n## Monad for the simulations \ud83e\uddea\\n\\nThe main side-effects that we want to integrate into the Coq simulations are:\\n\\n- the mutation of the global state `Evm`,\\n- the raising of exceptions.\\n\\nFor that, we use a state and error monad `MS?`:\\n\\n```coq\\nModule StateError.\\n Definition t (State Error A : Set) : Set :=\\n State -> (A + Error) * State.\\n\\n Definition return_ {State Error A : Set}\\n (value : A) :\\n t State Error A :=\\n fun state => (inl value, state).\\n\\n Definition bind {State Error A B : Set}\\n (value : t State Error A)\\n (f : A -> t State Error B) :\\n t State Error B :=\\n fun state =>\\n let (value, state) := value state in\\n match value with\\n | inl value => f value state\\n | inr error => (inr error, state)\\n end.\\nEnd StateError.\\n\\nNotation \\"MS?\\" := StateError.t.\\n```\\n\\nWe parametrize it by an equivalent definition in Coq of the type `Evm` and the type of exceptions that we might raise.\\n\\nIn Python the exceptions are a class that is extended as needed to add new kinds of exceptions. We use a closed sum type in Coq to represent the all possible exceptions that might happen in the EVM interpreter.\\n\\nFor the `Evm` state, some functions might actually only modify a part of it. For example, the `pop` function only modifies the `stack` field. We use a mechanism of [lens](https://medium.com/javascript-scene/lenses-b85976cb0534) to specialize the state monad to only modify a part of the state. For example, the `pop` function has the type:\\n\\n```coq\\npop : MS? (list U256.t) Exception.t U256.t\\n```\\n\\nwhere `list U256.t` is the type of the stack, while the `add` function has type:\\n\\n```coq\\nadd : MS? Evm.t Exception.t unit\\n```\\n\\nWe define a lens for the stack in the `Evm` type with:\\n\\n```coq\\nModule Lens.\\n Record t (Big_A A : Set) : Set := {\\n read : Big_A -> A;\\n write : Big_A -> A -> Big_A\\n }.\\nEnd Lens.\\n\\nModule Evm.\\n Module Lens.\\n Definition stack : Lens.t Evm.t (list U256.t) := {|\\n Lens.read := (* ... *);\\n Lens.write := (* ... *);\\n |}.\\n```\\n\\nWe can then lift the `pop` function to be used in a context where the `Evm` state is modified with:\\n\\n```coq\\nletS? x := StateError.lift_lens Evm.Lens.stack pop in\\n```\\n\\n## Typing discipline \ud83d\udc6e\\n\\nWe keep in Coq all the type names from the Python source code. When a new class is created we create a new Coq type. When the class inherits from another one, we add a field in the Coq type to represent the parent class. Thus we work by composition rather than inheritance.\\n\\nHere is an example of the primitive types defined in [base_types.py](https://github.com/ethereum/execution-specs/blob/master/src/ethereum/base_types.py):\\n\\n```python\\nclass FixedUint(int):\\n MAX_VALUE: ClassVar[\\"FixedUint\\"]\\n\\n # ...\\n\\n def __add__(self: T, right: int) -> T:\\n # ...\\n\\nclass U256(FixedUint):\\n MAX_VALUE = 2**256 - 1\\n\\n # ...\\n```\\n\\nWe simulate it by:\\n\\n```coq\\nModule FixedUint.\\n Record t : Set := {\\n MAX_VALUE : Z;\\n value : Z;\\n }.\\n\\n Definition __add__ (self right_ : t) : M? Exception.t t :=\\n (* ... *).\\nEnd FixedUint.\\n\\nModule U256.\\n Inductive t : Set :=\\n | Make (value : FixedUint.t).\\n\\n Definition of_Z (value : Z) : t :=\\n Make {|\\n FixedUint.MAX_VALUE := 2^256 - 1;\\n FixedUint.value := value;\\n |}.\\n\\n (* ... *)\\nEnd U256.\\n```\\n\\nFor the imports, that are generally written with an explicit list of names:\\n\\n```python\\nfrom ethereum.base_types import U255_CEIL_VALUE, U256, U256_CEIL_VALUE, Uint\\n```\\n\\nwe follow the same pattern in Coq:\\n\\n```coq\\nRequire ethereum.simulations.base_types.\\nDefinition U255_CEIL_VALUE := base_types.U255_CEIL_VALUE.\\nModule U256 := base_types.U256.\\nDefinition U256_CEIL_VALUE := base_types.U256_CEIL_VALUE.\\nModule Uint := base_types.Uint.\\n```\\n\\nThis is a bit more verbose than the usual way in Coq to import a module, but it makes the translation more straightforward.\\n\\n## Final simulation \ud83e\udeb6\\n\\nFinally, our Coq simulation of the `add` function is the following:\\n\\n```coq\\nDefinition add : MS? Evm.t Exception.t unit :=\\n (* STACK *)\\n letS? x := StateError.lift_lens Evm.Lens.stack pop in\\n letS? y := StateError.lift_lens Evm.Lens.stack pop in\\n\\n (* GAS *)\\n letS? _ := charge_gas GAS_VERY_LOW in\\n\\n (* OPERATION *)\\n let result := U256.wrapping_add x y in\\n\\n letS? _ := StateError.lift_lens Evm.Lens.stack (push result) in\\n\\n (* PROGRAM COUNTER *)\\n letS? _ := StateError.lift_lens Evm.Lens.pc (fun pc =>\\n (inl tt, Uint.__add__ pc (Uint.Make 1))) in\\n\\n returnS? tt.\\n```\\n\\nWe believe that it has a size and readability close to the original Python code. You can look at this definition in [vm/instructions/simulations/arithmetic.v](https://github.com/formal-land/coq-of-python/blob/main/CoqOfPython/ethereum/paris/vm/instructions/simulations/arithmetic.v). As a reference, the automatic translation is 65 lines long and in [vm/instructions/arithmetic.v](https://github.com/formal-land/coq-of-python/blob/main/CoqOfPython/ethereum/paris/vm/instructions/arithmetic.v).\\n\\n## Conclusion\\n\\nWe have seen how to write a simulation for one example of a Python function. We now need to do it for the rest of the code of the interpreter. We will also see in a following article how to prove that the simulation behaves as the automatic translation of the Python code in Coq.\\n\\nFor our formal verification services, reach us at [contact@formal.land](mailto:contact@formal.land) \ud83c\udfc7! To know more about what we have done, see [our previous project](https://formal-land.gitlab.io/coq-tezos-of-ocaml/) on the verification of the L1 of Tezos."},{"id":"/2024/05/10/translation-of-python-code","metadata":{"permalink":"/blog/2024/05/10/translation-of-python-code","source":"@site/blog/2024-05-10-translation-of-python-code.md","title":"\ud83d\udc0d Translation of Python code to Coq","description":"We are starting to work on a new product, coq-of-python. The idea of this tool is, as you can guess, to translate Python code to the proof system Coq.","date":"2024-05-10T00:00:00.000Z","formattedDate":"May 10, 2024","tags":[{"label":"coq-of-python","permalink":"/blog/tags/coq-of-python"},{"label":"Python","permalink":"/blog/tags/python"},{"label":"Coq","permalink":"/blog/tags/coq"},{"label":"translation","permalink":"/blog/tags/translation"},{"label":"Ethereum","permalink":"/blog/tags/ethereum"}],"readingTime":10.445,"hasTruncateMarker":true,"authors":[],"frontMatter":{"title":"\ud83d\udc0d Translation of Python code to Coq","tags":["coq-of-python","Python","Coq","translation","Ethereum"],"authors":[]},"unlisted":false,"prevItem":{"title":"\ud83d\udc0d Simulation of Python code in Coq","permalink":"/blog/2024/05/14/translation-of-python-code-simulations"},"nextItem":{"title":"\ud83e\udd80 Translation of the Rust\'s core and alloc crates","permalink":"/blog/2024/04/26/translation-core-alloc-crates"}},"content":"We are starting to work on a new product, [coq-of-python](https://github.com/formal-land/coq-of-python). The idea of this tool is, as you can guess, to translate Python code to the [proof system Coq](https://coq.inria.fr/).\\n\\nWe want to import specifications written in Python to a formal system like Coq. In particular, we are interested in the [reference specification](https://github.com/ethereum/execution-specs) of [Ethereum](https://ethereum.org/), which describes how [EVM smart contracts](https://ethereum.org/en/developers/docs/evm/) run. Then, we will be able to use this specification to either formally verify the various implementations of the EVM or smart contracts.\\n\\nAll this effort follows [a Tweet](https://twitter.com/VitalikButerin/status/1759369749887332577) from [Vitalik Buterin](https://en.wikipedia.org/wiki/Vitalik_Buterin) hoping for more formal verification of the Ethereum\'s code:\\n\\n> One application of AI that I am excited about is AI-assisted formal verification of code and bug finding.\\n>\\n> Right now ethereum\'s biggest technical risk probably is bugs in code, and anything that could significantly change the game on that would be amazing.\\n>\\n> — Vitalik Buterin\\n\\nWe will now describe the technical development of `coq-of-python`. For the curious, all the code is on GitHub: [formal-land/coq-of-python](https://github.com/formal-land/coq-of-python).\\n\\n\x3c!-- truncate --\x3e\\n\\n\\n\\n## Reading Python code \ud83d\udcd6\\n\\nA first step we need to do to translate Python code is to read it in a programmatic way. For simplicity and better integration, we chose to write `coq-of-python` in Python.\\n\\nWe use the [ast](https://docs.python.org/3/library/ast.html) module to parse the code and get an abstract syntax tree (AST) of the code. This is a tree representation of the code that we can manipulate in Python. We could have used other representations, such as the Python bytecode, but it seemed too low-level to be understandable by a human.\\n\\nGiven the path to a Python file, we get its AST with the following code:\\n\\n```python\\nimport ast\\n\\ndef read_python_file(path: str) -> ast.Module:\\n with open(path, \\"r\\") as file:\\n return ast.parse(file.read())\\n```\\n\\nThis code is very short, and we benefit from the general elegance of Python. There is no typing or advanced data types in Python, keeping the AST rather small. Here is an extract of it:\\n\\n```\\nexpr = BoolOp(boolop op, expr* values)\\n | NamedExpr(expr target, expr value)\\n | BinOp(expr left, operator op, expr right)\\n | UnaryOp(unaryop op, expr operand)\\n | Lambda(arguments args, expr body)\\n | IfExp(expr test, expr body, expr orelse)\\n | Dict(expr* keys, expr* values)\\n | Set(expr* elts)\\n | ListComp(expr elt, comprehension* generators)\\n | SetComp(expr elt, comprehension* generators)\\n | ... more cases ...\\n```\\n\\nAn expression is described as being of one of several kinds. For example, the application of a binary operator such as:\\n\\n```python\\n1 + 2\\n```\\n\\ncorresponds to the case `BinOp` with `1` as the `left` expression, `+` as the `op` operator, and `2` as the `right` expression.\\n\\n## Outputting Coq code \ud83d\udcdd\\n\\nWe translate each element of the Python\'s AST into a string of Coq code. We keep track of the current indentation level in order to present a nice output. Here is the code to translate the binary operator expressions:\\n\\n```python\\ndef generate_expr(indent, is_with_paren, node: ast.expr):\\n if isinstance(node, ast.BoolOp):\\n ...\\n elif isinstance(node, ast.BinOp):\\n return paren(\\n is_with_paren,\\n generate_operator(node.op) + \\" (|\\\\n\\" +\\n generate_indent(indent + 1) +\\n generate_expr(indent + 1, False, node.left) + \\",\\\\n\\" +\\n generate_indent(indent + 1) +\\n generate_expr(indent + 1, False, node.right) + \\"\\\\n\\" +\\n generate_indent(indent) + \\"|)\\"\\n )\\n elif ...\\n```\\n\\nWe have the current number of indentation levels in the `indent` variable. We use the flag `is_with_paren` to know whether we should add parenthesis around the current expression if it is the sub-expression of another one.\\n\\nWe apply the `node.op` operator on the two parameters `node.left` and `node.right`. For example, the translation of the Python code `1 + 2` will be:\\n\\n```coq\\nBinOp.add (|\\n Constant.int 1,\\n Constant.int 2\\n|)\\n```\\n\\nWe use a special notation `f (| x1, ..., xn |)` to represent a function application in a monadic context. In the next section, we explain why we need this notation.\\n\\n## Monad and values \ud83d\udd2e\\n\\nOne of the difficulties in translating some code to a language such as Coq is that Coq is purely functional. This means that a function can never modify a variable or raise an exception. The non-purely functional actions are called side-effects.\\n\\nTo solve this issue, we represent the side-effects of the Python code in a [monad](If you want to go into more details for the formal verification of Python programs, you can look at our coq-of-python project, where we define the semantics of Python in Coq and verify properties of Python programs (ongoing project!). We also provide formal verification services for Rust and other languages like OCaml. Contact us at contact@formal.land to discuss!
We have presented here the idea of formal verification, a technique to verify the absence of bugs in a program by reasoning from first principles. We have illustrated this idea for a simple Python example, showing how we can verify that a function computing the maximum of a list is correct for all possible lists of integers.
-We will continue with more blog posts explaining what we can do with formal verification and why it matters. Feel free to share this post and tell us what subjects you would like to see covered!