git rekt — codes.git (8304f9cadd137d2d900548015a8d0df0f6f7ab0e): src/index.tex

src/index.tex (view raw)
  1\maketitle
  2
  3\
  4{\em Coding theory} is a branch of mathematics and computer science that deals with the properties of {\em codes} and their respective fitness for specific applications. One of the key areas of research in coding theory is the study of bounds on the performance of codes: two of the most important bounds in this area are the {\em sphere-packing bound} and the {\em Delsarte bound}. In this brief article, we will take a closer look at these bounds, exploring their key concepts and applications in modern coding theory, in order to show how they can be used to optimize the performance of a code.
  5
  6
  7\section{Introduction}
  8Suppose we're trying to build a remote with 16 keys for a DVD player. The most natural option would be to transmit 4-bit sequences, since there are $2^4=16$ possible sequences (or {\em words}) that can be arranged with 4 bits.
  9
 10However, a problem arises as soon as the situation gets more realistic. If the transmission is not reliable, there is a probability for each bit in the transmission to be received incorrectly; this will lead to a lot of wrong inputs and is thus a flaw of the device.
 11
 12We could try reducing the error rate by tripling each of the four transmitted bits, so instead of tarnsmitting {\em abcd} we would transmit {\em aaabbbcccddd}, but this leads to another problem: we're using 12 bits! This will exhaust the battery of our remote control much faster.
 13
 14\begin{definition}[Hamming distance]
 15Given two code words $\textbf{w}, \textbf{w}' \in \{0,1\}^n$, the \emph{Hamming distance} between $\textbf{w}$ and $\textbf{w}'$ is the number of bits in which $\textbf{w}$ differs from $\textbf{w}'$:
 16
 17\begin{equation}
 18    d_H(\textbf{w}, \textbf{w}') := |\{j \in \{1, 2, \ldots, n\} : w_j \neq w_j'\}|.
 19\end{equation}
 20\end{definition}
 21In other words, the Hamming distance is the number of errors (bit flips) that need to occur for \textbf{w} to become \textbf{w'}.
 22
 23\begin{example}
 24If $n = 3$, we can form $2^3=8$ unique code words, with the following distances from each other:
 25
 26\begin{table}[!ht]
 27    \centering
 28    \begin{tabular}{|c|c|c|c|c|c|c|c|c|}
 29    \hline
 30        \textbf{$d_H$} & \textbf{101} & \textbf{111} & \textbf{011} & \textbf{001} & \textbf{000} & \textbf{010} & \textbf{110} & \textbf{100} \\ \hline
 31        \textbf{101} & 0 & 1 & 2 & 1 & 2 & 3 & 2 & 1 \\ \hline
 32        \textbf{111} & 1 & 0 & 1 & 2 & 3 & 2 & 1 & 2 \\ \hline
 33        \textbf{011} & 2 & 1 & 0 & 1 & 2 & 1 & 2 & 3 \\ \hline
 34        \textbf{001} & 1 & 2 & 1 & 0 & 1 & 2 & 3 & 2 \\ \hline
 35        \textbf{000} & 2 & 3 & 2 & 1 & 0 & 1 & 2 & 1 \\ \hline
 36        \textbf{010} & 3 & 2 & 1 & 2 & 1 & 0 & 1 & 2 \\ \hline
 37        \textbf{110} & 2 & 1 & 2 & 3 & 2 & 1 & 0 & 1 \\ \hline
 38        \textbf{100} & 1 & 2 & 3 & 2 & 1 & 2 & 1 & 0 \\ \hline
 39    \end{tabular}
 40    \caption{Hamming distances for all possible code words with $n=3$}
 41\end{table}
 42
 43Another way to visualize the Hamming distance is by putting each possible code word in the vertices of a (hyper)cube. Of course, for $n=2$ the cube becomes a square. If every word is placed such that it has only one bit of difference with respect to each of its neighbors, then the Hamming distance between two words will be the number of edges we need to traverse in order to get from one word to the other.
 44
 45\begin{figure}
 46    \centering
 47    \includegraphics[width=0.25\textwidth]{hamming-cube.png}
 48\caption{Hamming cube for $n=3$}
 49\end{figure}
 50
 51\end{example}
 52
 53\begin{definition}[weight of a code-word]
 54Suppose $\textbf{w}$ is a code-word such that $\textbf{w} \in \{0,1\}^n$. The \emph{weight} of $\textbf{w}$ is the number of 1's in $\textbf{w}$:
 55
 56\begin{equation}
 57    |\textbf{w}| := |\{j \in \{1, 2, \ldots, n\} : w_j = 1\}|.
 58\end{equation}
 59
 60\end{definition}
 61\begin{example}
 62Suppose $n=5$.
 63\begin{equation}
 64    |(1, 0, 0, 1, 1)| = 3 \quad\quad |(1, 0, 0, 0, 0)| = 1 \quad\quad |(0, 0, 1, 1, 1)| = 4
 65\end{equation}
 66\end{example}
 67
 68\begin{definition}[bit-wise xor]
 69Given two words $\textbf{w}, \textbf{w}' \in \{0,1\}^n$, we can define the following operation:
 70\begin{equation}
 71    \textbf{w} \oplus \textbf{w}' = ((w_1 + w_1')\text{mod}2, (w_2 + w_2')\text{mod}2, \ldots, (w_n + w_n')\text{mod}2) \in \{0,1\}^n
 72\end{equation}
 73\end{definition}
 74The \emph{sum modulo 2} operation returns a vector of size $n$ where each component  $z_j$ has value:
 75\begin{itemize}
 76    \item 0 if $w_j = w_j'$;
 77    \item 1 if $w_j \neq w_j'$
 78\end{itemize}
 79
 80\begin{example}
 81Suppose $n=6, \textbf{w} = (1, 1, 0, 0, 1, 0), \textbf{w}' = (0, 1, 1, 0, 0, 1)$.
 82\begin{equation}
 83\begin{array}{l}
 84(1, 1, 0, 0, 1, 0) \,+ \\
 85(0, 1, 1, 0, 0, 1) = \\  \hline
 86(1, 2, 1, 0, 1, 1)
 87\end{array}
 88\quad\rightarrow\quad
 89\begin{array}{l}
 90(1, 2, 1, 0, 1, 1)\text{mod} 2 = \\ \hline
 91(1, 0, 1, 0, 1, 1) = \textbf{w} \oplus \textbf{w}'
 92\end{array}.
 93\end{equation}
 94\end{example}
 95
 96At this point, it's clear how this identity holds true:
 97\begin{equation}
 98    d_H(\textbf{w}, \textbf{w}') = |\textbf{w} \oplus \textbf{w}'|
 99\end{equation}
100Which means: the weight of the result obtained from the bit-wise xor operation is the Hamming distance between the two code-words.
101
102\begin{definition}[distance of a code]
103In coding theory, any subset $\mathcal{C} \subseteq \{0,1\}^n$ is called a \emph{code}.
104
105A code $\mathcal{C}$ has \emph{distance} $d$ if:
106
107\begin{equation}
108    d_H(\textbf{w}, \textbf{w}') \geq d \;\; \forall \;\textbf{w}, \textbf{w}' \in \mathcal{C}.
109\end{equation}
110
111Lastly, for $n, d \geq 0$, let $A(n,d)$ denote the maximum cardinality of a code $\mathcal{C} \subseteq \{0,1\}^n$ with distance $d$.
112\end{definition}
113
114\begin{example}
115For $n=7$, a valid code of distance $d=3$ could be the set:
116
117\begin{equation}
118\mathcal{C} =
119\left.
120\begin{cases}
121& 0000000, 0001011, 0010101, 0011110, \;\;\;\\
122& 0100110, 0101101, 0110011, 0111000, \\
123& 1000111, 1001100, 1010010, 1011001, \\
124& 1100001, 1101010, 1110100, 1111111 
125\end{cases}\right\}.
126\end{equation}
127
128Notice how, in this code, every two distinct words have an Hamming distance of at least $3$.
129\end{example}
130
131\begin{proposition}
132A code $\mathcal{C}$ can correct at most $r$ errors if and only if it has distance $d \geq 2r+1$.
133\end{proposition}
134\begin{example}
135    Suppose $\mathcal{C}$ contains two distinct words $\textbf{w}', \textbf{w}''$ that differ in at most $2r$ bits. For example, with $n=7$ and $r=1$, suppose we receive $\hat{\textbf{w}}$, obtained by flipping $r=1$ bits in one of them:
136\begin{equation}
137\textbf{w}' = 0000011 \quad \textbf{w}'' =0000101 \quad \hat{\textbf{w}} = 0000001
138\end{equation}
139At this point, it's impossible to know whether the originally transmitted word was $\textbf{w}'$ or $\textbf{w}''$. This test can be repeated by choosing any code with distance $d=2r$ and flipping $r$ of them.
140
141On the other hand, if $\mathcal{C}$ had a distance of $2r+1$, we would be sure it could correct up to $r$ errors:
142
143\begin{equation}
144\textbf{w}' = 0000011 \quad \textbf{w}'' =0010101 \quad \hat{\textbf{w}} = 0000001
145\end{equation}
146
147In this case, we can be sure that $\textbf{w}''$ was transmitted.
148\end{example}
149
150In most cases, the number $n$ of bits we can transmit and the amount of errors $r$ to be corrected are given; one of the main problems of coding theory (and the main topic of this document) is finding $A(n,d)$, the maximum possible size of a code $\mathcal{C} \subseteq \{0,1\}^n$ with distance $d$.
151
152\subsection{Simple cases}
153For all $n$, we always have $A(n,1) = 2^n$, because any code has distance 1 by definition; of course, this does not allow us to correct any error. The same goes for the case of $d=2$, where $A(n,2) = 2^{n-1}$. Unfortunately, for $d >= 3$, things start to get complicated.
154
155\subsection{The sphere-packing bound}
156For any $n$ and $d$, we can use a \emph{volume argument} to obtain a simple upper bound on $A(n, d)$.
157
158Imagine a box filled with balls. If you wanted to guess how many balls can fit inside it, you could conclude that the number of balls is bounded above by the volume of the container divided by the volume of a single ball.
159
160Let's assume that $d = 2r+1$ is odd and fix any code $\mathcal{C}$ of distance $d$. Now, the set $\{0,1\}^n$ represents the box, containing $|\mathcal{C}|$ Hamming balls. Each ball is defined as the set of all words in $\{0,1\}^n$ with a distance $r$ from a certain word $w \in \mathcal{C}$.
161\begin{equation}
162    B(\mathbf{w}, r) := \{\mathbf{w}' \in \{0,1\}^n : d_H(\mathbf{w}, \mathbf{w}') \leq r\}, \mathbf{w} \in \mathcal{C}.
163\end{equation}
164
165So we have $|\mathcal{C}|$ balls of radius $r$ and distance at least $2r+1$ from one another. This means the balls are disjoint, they're not overlapped. The total number of balls (words in $\mathcal{C}$) cannot be larger than the total number of words in $\{0,1\}^n$  divided by the number of words in a single ball.
166
167The number of words at Hamming distance exactly $i$ from any word $w$ is ${n \choose i}$, which implies:
168
169\begin{equation}
170    |B(\mathbf{w}, r)| = 1 + {n \choose 1} + {n \choose 2} + \dots + {n \choose r} = \sum_{i=0}^{r} {n \choose i},
171\end{equation}
172where each iteration of the sum adds the amount of elements of weight $i$.
173
174Finally, as we said before, the total number of elements in $\{0,1\}^n$ is $2^n$. So, we get the following upper bound:
175
176\begin{lemma}[sphere-packing bound]
177    For all n and r,
178
179    \begin{equation}
180        A(n, 2r+1) \leq \floor*{\frac{2^n}{\sum\limits_{i=0}^{r}{n \choose i}}}.
181    \end{equation}
182\end{lemma}
183
184\begin{example}
185    For $n=7$ and $d=3$, we have $r=1$ so:
186    \begin{equation}
187        A(7,3) \leq \floor*{\frac{2^7}{\sum\limits_{i=0}^{1}{7 \choose i}}} = \floor*{\frac{128}{{7 \choose 0} + {7 \choose 1}}} = \floor*{\frac{128}{1 + 7}} = 16.
188    \end{equation}
189\end{example}
190
191\begin{example}
192    For $n=17$ and $d=3$:
193    \begin{equation}
194        A(17,3) \leq \floor*{\frac{2^{17}}{\sum\limits_{i=0}^{1}{17 \choose i}}} = \floor*{\frac{131072}{{17 \choose 0} + {17 \choose 1}}} = \floor*{\frac{131072}{1 + 17}} = 7281.
195    \end{equation}
196\end{example}
197
198\section{The Delsarte bound}
199\begin{theorem}[Delsarte bound]
200    For integers $n$, $i$, $t$ with $0 \leq i$, $t \leq n$, let us put
201
202    \begin{equation}
203        K_t(n,i) = \sum_{j=0}^{min(i,t)}(-1)^j{i \choose j}{n - i \choose t - j}.
204    \end{equation}
205
206    Then, for every $n$ and $d$, $A(n,d)$ is bounded above by the optimum value of the following linear program in variables $x_0, x_1, \ldots, x_n$:
207
208    \begin{equation}
209        \begin{array}{lll}
210            \text{Maximize}   & x_0 + x_1 + \ldots + x_n                       &                  \\
211            \text{subject to} & x_0 = 1                                        &                  \\
212                              & x_i = 0,                                       & i=1,2,\ldots,d-1 \\
213                              & \sum\limits_{i=0}^{n}K_t(n,i) \dot x_i \geq 0, & t=1,2,\ldots,n   \\
214                              & x_0, x_1, \ldots, x_n \geq 0                   &                  \\
215        \end{array}
216    \end{equation}
217\end{theorem}
218
219
220
221%\begin{array}{l}
222%\end{array}