lectures/lecture_1.tex

\lecture{Probabilistic Complexity Classes}{Mon}{29}{Feb}{2016}

\subsection{Review of Complexity Classes}

\begin{definition}[Alphabet, Strings, Languages, Complexity Class]

A finite alphabet is a finite non-empty set $\Sigma$. A finite string over $\Sigma$ is a finite sequence of elements in $\Sigma$ and the set of all finite strings over $\Sigma$ is denoted $\Sigma^*$. Any set of finite strings over a finite alphabet is called a language, i.e., each subset $L$ of $\Sigma^*$ is a language. Any set of languages is called a complexity class.
\end{definition}

An example of a language is the set
\[L = \left\{ (G, s, t) \mid G \text{ is a graph and } s \text{ and } t \text{ are vertices of } G \text{ connected by a path}  \right\}.\]
The algorithms we look into focus on decision problems. These are ``yes, no" problems defined as follows:
\begin{definition}[Decision Problem]
Given a language $L$, the algorithmic decision problem for $L$ is to find an algorithm $A$ that gets strings as its input and decides whether the given string is in $L$, more specifically we are looking for an algorithm $A$ such that for each string $x$:
$$A(x) = \left\{\begin{matrix}
1 & x \in L\\ 
0 & x \not \in L 
\end{matrix}\right..$$
When there is no fear of confusion, we do not distinguish between a language and its decision problem.
\end{definition}

We are specifically interested in algorithms that run in polynomial time.

\begin{definition}[Worst-Case Runtime of an Algorithm]
Given an algorithm $A$, its worst-case runtime, $T_A$, is a function of the input length, defined to be the maximum runtime of $A$ over all strings of that length, more formally:
$$T_A(n) = \max_{x \in \Sigma^{n}} T(A, x),$$ where $T(A,x)$ is the execution time of $A$ when given $x$ as its input.
\end{definition}

\begin{definition}[Polytime Algorithm]
An algorithm $A$ is a polytime algorithm if its worst-case runtime $T_A$ is bounded by a polynomial.
\end{definition}

We now review some important complexity classes.
\begin{definition}[$\cc{P}$] \label{pdef} A language $L$ is in the complexity class $\cc{P}$ if there exists a polytime algorithm $A$ that solves its decision problem.
\end{definition}

\begin{definition}[$\cc{NP}$ and $\cc{coNP}$]\label{npdef} A language $L$ is in the complexity class $\cc{NP}$ if there exist a polynomial $p$ and a polytime algorithm $A$ such that:
$$\forall x \in L ~~ \exists y \in \{0, 1\}^{p(\vert x \vert )} ~~ A(x, y) = 1,$$ and 
$$\forall x \not \in L ~~ \forall y \in \{0, 1\}^{p(\vert x \vert )} ~~ A(x, y) = 0.$$

If $A(x, y) = 1$, then $y$ is said to be a witness for $x$. A language $L$ is in $\cc{coNP}$ if and only if its complement, $\Sigma^* \setminus L$, is in $\cc{NP}$.
\end{definition}

\begin{proposition}
$\cc{P} \subseteq \cc{NP}$.
\end{proposition}
\begin{proof}
If $L \in \cc{P}$, then there exists a polytime algorithm $A$ that solves its decision problem. The same algorithm can be used as in Definition~\ref{npdef} with any arbitrary witness to show that $L$ is in $\cc{NP}$.
\end{proof}

\subsection{Probabilistic Complexity Classes}
In the probabilistic computation model the algorithms get as their input a string $x$ and a random input $r \in \{0, 1\}^{p(\vert x \vert)}$ for some polynomial $p$, i.e., length of the random input is bounded by a polynomial in terms of $x$'s length. Moreover $r$ is assumed to be chosen uniformly. The algorithm then has to compute an output, or a decision, based on $x$ and $r$.

\begin{definition}[Probabilistic Polytime]
$A$ is a probabilistic polytime algorithm if:
\begin{itemize}
\item Length of the random part, $r$, is bounded by a polynomial in string $x$'s length, and
\item Worst-case runtime of $A$ is bounded by a polynomial.\qedhere
\end{itemize}
\end{definition}

We now define several useful probabilistic complexity classes.

\begin{definition}[$\cc{RP}$ -- Randomized Polynomial] \label{rpdef}
A decision problem $L$ is in $\cc{RP}$ if there exist a polynomial $p$ and a probabilistic polytime algorithm $A$ such that:
$$\forall x \in L ~~~ \Pr[A(x, r) = 1] > \frac{1}{2},$$ and $$\forall x \not \in L ~~~ \Pr[A(x, r) = 1] = 0,$$
where the probabilities are calculated over all $r \in \{0, 1\}^{p(\vert x \vert)}$ uniformly.

This intuitively means that the algorithm does rejection correctly, i.e., every $x \not \in L$ is always rejected and accepts correctly with probability more than half.
\end{definition}

\begin{definition}[$\cc{coRP}$] \label{corpdef}
A decision problem $L$ is in $\cc{coRP}$ if there exist a polynomial $p$ and a probabilistic polytime algorithm $A$ such that:
$$\forall x \in L ~~~ \Pr[A(x, r) = 1] = 1,$$ and $$\forall x \not \in L ~~~ \Pr[A(x, r) = 1] < \frac{1}{2},$$
where the probabilities are calculated over all $r \in \{0, 1\}^{p(\vert x \vert)}$ uniformly.

This intuitively means that the algorithm does acceptance correctly, i.e., every $x \in L$ is always accepted and rejects correctly with probability more than half.
\end{definition}

Both $\cc{RP}$ and $\cc{coRP}$ account for one-sided error, now we define a complexity class that allows two-sided errors, i.e., in both acceptance and rejection.

\begin{definition}[$\cc{BPP}$ -- Bounded Probabilistic Polynomial] 
A decision problem $L$ is in $\cc{BPP}$ if there exist a polynomial $p$ and a probabilistic polytime algorithm $A$ such that:
$$\forall x \in L ~~~ \Pr[A(x, r) = 1] \geq \frac{2}{3},$$ and $$\forall x \not \in L ~~~ \Pr[A(x, r) = 1] \leq \frac{1}{3},$$
where the probabilities are calculated over all $r \in \{0, 1\}^{p(\vert x \vert)}$ uniformly.
\end{definition}

\textbf{Note:} The $\frac{1}{2}$'s in definitions of $\cc{RP}$ and $\cc{coRP}$ are arbitrary numbers and one can get same complexity classes using other constant numbers or even constants raised to the power of a polynomial by simply repeating the algorithms. On the other hand in the definition of $\cc{BPP}$, the first constant must be bigger than a half and the second one must be less than a half. To see why, consider a coin-tossing algorithm.

\begin{definition}[Extended Decision Algorithms]
An extended decision algorithm $A$ is an algorithm that can return one of the three values $0$, $1$ and $?$, signifying rejection, acceptance and doubt or failure respectively.
\end{definition}

\begin{definition}[$\cc{ZPP}$ -- Zero-error Probabilistic Polynomial]\label{zppdef}
A decision problem $L$ is in $\cc{ZPP}$ if there is a polynomial $p$ and a polytime extended algorithm $A$ such that:
$$\forall x ~~ \Pr[A(x, r) = ?] \leq \frac{1}{2},$$ and
$$\forall x ~~ \forall r \in \{0, 1\}^{p(\vert x \vert)}  ~~ A(x, r)\neq ? \Rightarrow A(x, r) = \left\{\begin{matrix}
1 & x \in L\\ 
0 & x \not \in L
\end{matrix}\right.,$$
where the probabilities are calculated over all $r \in \{0, 1\}^{p(\vert x \vert)}$ uniformly.

Intuitively, this means that the algorithm fails (is unsure) with probability less than half, and when it does not fail it will always produce the correct answer.
\end{definition}

\begin{proposition}
$\cc{P} \subseteq \cc{RP}.$
\end{proposition}
\begin{proof}
Use the algorithm $A$ from Definition~\ref{pdef} in Definition~\ref{rpdef} and ignore $r$. 
\end{proof}

One can similarly and easily show that $\cc{P} \subseteq \cc{coRP}$ and $\cc{P} \subseteq \cc{ZPP}$.

\begin{proposition}
$\cc{RP} \subseteq \cc{NP}$.
\end{proposition}
\begin{proof}
If $L \in \cc{RP}$, then for each $x \in L$, $\Pr[A(x, r) = 1] > \frac{1}{2}$. Since this probability is positive, there exists an $r$ such that $A(x, r) = 1$. This $r$ can be used as a witness for $x$. Similarly, if $x \not \in L$, no witness can be found since $\Pr[A(x, r) = 1] = 0$.
\end{proof}

\textbf{Note:} We do not know whether $\cc{P}=\cc{RP}$ or whether $\cc{RP}=\cc{NP}$. 

\begin{proposition}
$\cc{RP} \subseteq \cc{BPP}.$
\end{proposition}
\begin{proof}
Let $L \in \cc{RP}$. Take an algorithm $A$ for it as in Definition~\ref{rpdef} and construct the algorithm $A^{(k)}$ which takes a string $x$ as input and works as follows:
\begin{tabbing}
\hspace*{.25in} \= \hspace*{.25in} \= \hspace*{.25in} \= \hspace*{.25in} \= \hspace*{.25in} \=\kill
\> Choose random strings $r_1, r_2, \ldots, r_k$\\
\>{\bf if} $\exists i ~~ A(x, r_i) = 1$ {\bf then } \\
\>\> {\bf return} 1 \\
\>{\bf else} \\
\>\> {\bf return} 0\\
\>{\bf endif} 
\end{tabbing}

If $x \in L$, then $A(x, r)$ returns 1 with probability more than $\frac{1}{2}$, so $A^{(k)}$ returns 1 with probability more than $1 - \frac{1}{2^k}$. It is sufficient to choose $k$ such that this value gets bigger than $\frac{2}{3}$. On the other hand, if $x \not \in L$, then $A(x, r)$ always returns 0 and so does $A^{(k)}$.
\end{proof}

\textbf{Note:} The relationship between $\cc{BPP}$ and $\cc{NP}$ is an open problem.

\textbf{Homework:} Prove that if $\cc{NP} \subseteq \cc{BPP}$, then $\cc{NP} = \cc{RP}$.

$$\begin{tikzpicture}
  \tikzset{main node/.style={circle,fill=blue!20,draw,minimum size=1cm,inner sep=0pt}}
    \node[main node] (1) {$\cc{NP}$};
    \node[main node] (2) [right =1.5cm of 1]  {$\cc{BPP}$};
    \node[main node] (3) [right =1.5cm of 2]  {$\cc{coNP}$};
    \node[main node] (4) [below = 1.5cm of 1] {$\cc{RP}$};
    \node[main node] (5) [below = 1.5cm of 3] {$\cc{coRP}$};
    \node[main node] (6) [below = 3cm of 2] {$\cc{P}$};

    \path[draw,thick]
    (6) edge node {} (4)
    (6) edge node {} (5)
    (4) edge node {} (1)
    (4) edge node {} (2)
    (5) edge node {} (2)
    (5) edge node {} (3)
    ;
    %%
\end{tikzpicture}$$
The figure above shows relations between various complexity classes. An edge between two classes means that the upper class is a superset of the lower one.

\begin{definition}[Probabilistic Average Runtime]
Given a probabilistic algorithm $A$, its average runtime is a function of the length, $n$, of input $x$ defined as:
$$\max_{x \in \{0, 1\}^n} E[T(A, x, r)],$$
where $T(A, x, r)$ is the runtime of $A$ with inputs $x$ and $r$ and the expectation is defined uniformly over all possible $r$. Intuitively, for each input $x$, we take the average time the algorithm requires to terminate on $x$ among all random $r$'s and then we take the maximum over all strings $x$ of the fixed length $n$.  
\end{definition}
\begin{definition}[$\cc{ACP}$ -- Average Case Polynomial]\label{acpdef}
A decision problem $L$ is in $\cc{ACP}$ if there is an algorithm $A$ with polynomial average runtime that always produces the right answer for $L$.
\end{definition}

\begin{proposition}
$\cc{ZPP} = \cc{ACP}.$
\end{proposition}
\begin{proof}
We first prove that $\cc{ZPP} \subseteq \cc{ACP}$. Let $L \in \cc{ZPP}$ and $A$ be an algorithm as in Definition~\ref{zppdef}. We provide the following algorithm $A'$:
\begin{tabbing}
\hspace*{.25in} \= \hspace*{.25in} \= \hspace*{.25in} \= \hspace*{.25in} \= \hspace*{.25in} \=\kill
\> 1: Choose a random string $r$\\
\>{\bf if} $A(x, r) \not = ?$ {\bf then } \\
\>\> {\bf return} $A(x, r)$ \\
\>{\bf else} \\
\>\> {\bf goto} 1\\
\>{\bf endif} 
\end{tabbing}

This algorithm, will always return the correct answer upon termination. Assuming that $A(x, r)$ terminates in time at most $t$, $A'$, when run on $x$, has an average runtime of at most $$t + \frac{1}{2} t + \frac{1}{4} t + \ldots = t \sum_{i=0}^\infty \frac{1}{2^i} = 2t.$$

Now we prove that $\cc{ACP} \subseteq \cc{ZPP}$. Let $A$ be an algorithm as in Definition~\ref{acpdef}. We create the following algorithm $A'$:

\begin{tabbing}
\hspace*{.25in} \= \hspace*{.25in} \= \hspace*{.25in} \= \hspace*{.25in} \= \hspace*{.25in} \=\kill
\> Run the first $2 t(x)$ steps of $A(x)$, where $t(x)$ is the average runtime of $A(x)$. \\
\>{\bf if} an answer was returned (a decision was made) {\bf then } \\
\>\> {\bf return} the same answer (decision) \\
\>{\bf else} \\
\>\> {\bf return} ?\\
\>{\bf endif} 
\end{tabbing}

When the returned value is not ?, $A'$ returns only correct answers because $A$ has the same property. We should only show that given a string $x$, the probability that $A'$ returns $?$ is at most $\frac{1}{2}$. Suppose otherwise, then $A(x)$ terminates in $2 t(x)$ steps with probability less than $\frac{1}{2}$ which is a contradiction.
\end{proof}


\begin{proposition} $\cc{ZPP} = \cc{RP} \cap \cc{coRP}$.\footnote{Was actually covered in Lecture 2}
\end{proposition}
\begin{proof}
We first show that $\cc{ZPP} \subseteq \cc{RP}$. A similar reasoning shows that $\cc{ZPP} \subseteq \cc{coRP}$. Take the algorithm $A$ as in Definition \ref{zppdef}. We construct the following algorithm that has the properties of Definition \ref{rpdef}:

\begin{tabbing}
\hspace*{.25in} \= \hspace*{.25in} \= \hspace*{.25in} \= \hspace*{.25in} \= \hspace*{.25in} \=\kill
\>{\bf if} $A(x, r) \neq ?$ {\bf then } \\
\>\> {\bf return} $A(x, r)$ \\
\>{\bf else} \\
\>\> {\bf return} $0$\\
\>{\bf endif} .
\end{tabbing}

Now we prove the other side. Let $L \in \cc{RP} \cap \cc{coRP}$ and let $A_1$ and $A_2$ be decision algorithms for $L$ according to Definitions \ref{rpdef} and \ref{corpdef}. The following algorithm $A'$ shows that $L \in \cc{ZPP}$ according to Definition \ref{zppdef}:

\begin{tabbing}
\hspace*{.25in} \= \hspace*{.25in} \= \hspace*{.25in} \= \hspace*{.25in} \= \hspace*{.25in} \=\kill
\>{\bf if} $A_1(x, r) = A_2(x, r)$ {\bf then } \\
\>\> {\bf return} $A_1(x, r)$ \\
\>{\bf else} \\
\>\> {\bf return} $?$\\
\>{\bf endif} .
\end{tabbing}

If $A'$ returns something other than $?$, then its answer is correct, because $A_1$ rejects correctly and $A_2$ accepts correctly and if they agree, then the decision must be correct.

We should now show that $\Pr[A'(x, r) = ?] \leq \frac{1}{2}$. If $x \in L$ then $A_2(x, r) = 1$ with probability $1$ and $\Pr[A_1(x, r) = 1] \geq \frac{1}{2}$, so $\Pr[A'(x, r) = 1] \geq \frac{1}{2}$ and $\Pr[A'(x, r) = ?] \leq \frac{1}{2}$. A similar argument settles the case when $x \not \in L$. 
\end{proof}

%%% Local Variables: 
%%% mode: latex
%%% TeX-master: "../main"
%%% End: