src/index.tex (view raw)
1\maketitle
2
3\
4{\em Coding theory} is a branch of mathematics and computer science that deals with the properties of {\em codes} and their respective fitness for specific applications. One of the key areas of research in coding theory is the study of bounds on the performance of codes: two of the most important bounds in this area are the {\em sphere-packing bound} and the {\em Delsarte bound}. In this brief article, we will take a closer look at these bounds, exploring their key concepts and applications in modern coding theory, in order to show how they can be used to optimize the performance of a code.
5
6
7\section{Introduction}
8Suppose we're trying to build a remote with 16 keys for a DVD player. The most natural option would be to transmit 4-bit sequences, since there are $2^4=16$ possible sequences (or {\em words}) that can be arranged with 4 bits.
9
10However, a problem arises as soon as the situation gets more realistic. If the transmission is not reliable, there is a probability for each bit in the transmission to be received incorrectly; this will lead to a lot of wrong inputs and is thus a flaw of the device.
11
12We could try reducing the error rate by tripling each of the four transmitted bits, so instead of tarnsmitting {\em abcd} we would transmit {\em aaabbbcccddd}, but this leads to another problem: we're using 12 bits! This will exhaust the battery of our remote control much faster.
13
14\begin{definition}[Hamming distance]
15Given two code words $\textbf{w}, \textbf{w}' \in \{0,1\}^n$, the \emph{Hamming distance} between $\textbf{w}$ and $\textbf{w}'$ is the number of bits in which $\textbf{w}$ differs from $\textbf{w}'$:
16
17\begin{equation}
18 d_H(\textbf{w}, \textbf{w}') := |\{j \in \{1, 2, \ldots, n\} : w_j \neq w_j'\}|.
19\end{equation}
20\end{definition}
21In other words, the Hamming distance is the number of errors (bit flips) that need to occur for \textbf{w} to become \textbf{w'}.
22
23\begin{example}
24If $n = 3$, we can form $2^3=8$ unique code words, with the following distances from each other:
25
26\begin{table}[!ht]
27 \centering
28 \begin{tabular}{|c|c|c|c|c|c|c|c|c|}
29 \hline
30 \textbf{$d_H$} & \textbf{101} & \textbf{111} & \textbf{011} & \textbf{001} & \textbf{000} & \textbf{010} & \textbf{110} & \textbf{100} \\ \hline
31 \textbf{101} & 0 & 1 & 2 & 1 & 2 & 3 & 2 & 1 \\ \hline
32 \textbf{111} & 1 & 0 & 1 & 2 & 3 & 2 & 1 & 2 \\ \hline
33 \textbf{011} & 2 & 1 & 0 & 1 & 2 & 1 & 2 & 3 \\ \hline
34 \textbf{001} & 1 & 2 & 1 & 0 & 1 & 2 & 3 & 2 \\ \hline
35 \textbf{000} & 2 & 3 & 2 & 1 & 0 & 1 & 2 & 1 \\ \hline
36 \textbf{010} & 3 & 2 & 1 & 2 & 1 & 0 & 1 & 2 \\ \hline
37 \textbf{110} & 2 & 1 & 2 & 3 & 2 & 1 & 0 & 1 \\ \hline
38 \textbf{100} & 1 & 2 & 3 & 2 & 1 & 2 & 1 & 0 \\ \hline
39 \end{tabular}
40 \caption{Hamming distances for all possible code words with $n=3$}
41\end{table}
42
43Another way to visualize the Hamming distance is by putting each possible code word in the vertices of a (hyper)cube. Of course, for $n=2$ the cube becomes a square. If every word is placed such that it has only one bit of difference with respect to each of its neighbors, then the Hamming distance between two words will be the number of edges we need to traverse in order to get from one word to the other.
44
45\begin{figure}
46 \centering
47 \includegraphics[width=0.25\textwidth]{hamming-cube.png}
48\caption{Hamming cube for $n=3$}
49\end{figure}
50
51\end{example}
52
53\begin{definition}[weight of a code-word]
54Suppose $\textbf{w}$ is a code-word such that $\textbf{w} \in \{0,1\}^n$. The \emph{weight} of $\textbf{w}$ is the number of 1's in $\textbf{w}$:
55
56\begin{equation}
57 |\textbf{w}| := |\{j \in \{1, 2, \ldots, n\} : w_j = 1\}|.
58\end{equation}
59
60\end{definition}
61\begin{example}
62Suppose $n=5$.
63\begin{equation}
64 |(1, 0, 0, 1, 1)| = 3 \quad\quad |(1, 0, 0, 0, 0)| = 1 \quad\quad |(0, 0, 1, 1, 1)| = 4
65\end{equation}
66\end{example}
67
68\begin{definition}[bit-wise xor]
69Given two words $\textbf{w}, \textbf{w}' \in \{0,1\}^n$, we can define the following operation:
70\begin{equation}
71 \textbf{w} \oplus \textbf{w}' = ((w_1 + w_1')\text{mod}2, (w_2 + w_2')\text{mod}2, \ldots, (w_n + w_n')\text{mod}2) \in \{0,1\}^n
72\end{equation}
73\end{definition}
74The \emph{sum modulo 2} operation returns a vector of size $n$ where each component $z_j$ has value:
75\begin{itemize}
76 \item 0 if $w_j = w_j'$;
77 \item 1 if $w_j \neq w_j'$
78\end{itemize}
79
80\begin{example}
81Suppose $n=6, \textbf{w} = (1, 1, 0, 0, 1, 0), \textbf{w}' = (0, 1, 1, 0, 0, 1)$.
82\begin{equation}
83\begin{array}{l}
84(1, 1, 0, 0, 1, 0) \,+ \\
85(0, 1, 1, 0, 0, 1) = \\ \hline
86(1, 2, 1, 0, 1, 1)
87\end{array}
88\quad\rightarrow\quad
89\begin{array}{l}
90(1, 2, 1, 0, 1, 1)\text{mod} 2 = \\ \hline
91(1, 0, 1, 0, 1, 1) = \textbf{w} \oplus \textbf{w}'
92\end{array}.
93\end{equation}
94\end{example}
95
96At this point, it's clear how this identity holds true:
97\begin{equation}
98 d_H(\textbf{w}, \textbf{w}') = |\textbf{w} \oplus \textbf{w}'|
99\end{equation}
100Which means: the weight of the result obtained from the bit-wise xor operation is the Hamming distance between the two code-words.
101
102\begin{proposition}
103 The number of words with Hamming distance exactly $i$ from a given word $\mathbf{w}$ is ${ n \choose i }$:
104
105 \begin{equation}
106 |\{ \mathbf{w} \in \{0,1\}^n : d_H(\mathbf{w},\mathbf{w}') = i\}| = {n \choose i}\quad \forall \mathbf{w'} \in \{0,1\}^n.
107 \end{equation}
108\end{proposition}
109\begin{example}
110 Suppose we want to count the words which have distance $i=2$ from the word $001$ ($n=3$).
111
112 \begin{equation}
113 \begin{aligned}
114 \mathbf{w} = 001 \\
115 n = 3 \\
116 i = 2
117 \end{aligned}
118 \quad
119 \left.\begin{aligned}
120 111\\
121 101\\
122 110
123 \end{aligned}
124 \right\} 3
125 \quad\quad
126 \begin{aligned}
127 \mathbf{w} = 10010 \\
128 n = 5 \\
129 i = 4
130 \end{aligned}
131 \quad
132 \left.\begin{aligned}
133 01100\\
134 11101\\
135 00101\\
136 01011\\
137 01101
138 \end{aligned}
139 \right\} 5
140 \end{equation}
141
142 \begin{equation}
143 {3 \choose 2} = \frac{3!}{2!(3-2)!}=\frac{3!}{2!}=3
144 \quad\quad
145 {5 \choose 4} = \frac{5!}{4!(5-4)!}=\frac{5!}{4!}=5
146 \end{equation}
147
148\end{example}
149
150\begin{definition}[distance of a code]
151In coding theory, any subset $\mathcal{C} \subseteq \{0,1\}^n$ is called a \emph{code}.
152
153A code $\mathcal{C}$ has \emph{distance} $d$ if:
154
155\begin{equation}
156 d_H(\textbf{w}, \textbf{w}') \geq d \;\; \forall \;\textbf{w}, \textbf{w}' \in \mathcal{C}.
157\end{equation}
158
159Lastly, for $n, d \geq 0$, let $A(n,d)$ denote the maximum cardinality of a code $\mathcal{C} \subseteq \{0,1\}^n$ with distance $d$.
160\end{definition}
161
162\begin{example}
163For $n=7$, a valid code of distance $d=3$ could be the set:
164
165\begin{equation}
166\mathcal{C} =
167\left.
168\begin{cases}
169& 0000000, 0001011, 0010101, 0011110, \;\;\;\\
170& 0100110, 0101101, 0110011, 0111000, \\
171& 1000111, 1001100, 1010010, 1011001, \\
172& 1100001, 1101010, 1110100, 1111111
173\end{cases}\right\}.
174\end{equation}
175
176Notice how, in this code, every two distinct words have an Hamming distance of at least $3$.
177\end{example}
178
179\begin{proposition}
180A code $\mathcal{C}$ can correct at most $r$ errors if and only if it has distance $d \geq 2r+1$.
181\end{proposition}
182\begin{example}
183 Suppose $\mathcal{C}$ contains two distinct words $\textbf{w}', \textbf{w}''$ that differ in at most $2r$ bits. For example, with $n=7$ and $r=1$, suppose we receive $\hat{\textbf{w}}$, obtained by flipping $r=1$ bits in one of them:
184\begin{equation}
185\textbf{w}' = 0000011 \quad \textbf{w}'' =0000101 \quad \hat{\textbf{w}} = 0000001
186\end{equation}
187At this point, it's impossible to know whether the originally transmitted word was $\textbf{w}'$ or $\textbf{w}''$. This test can be repeated by choosing any code with distance $d=2r$ and flipping $r$ of them.
188
189On the other hand, if $\mathcal{C}$ had a distance of $2r+1$, we would be sure it could correct up to $r$ errors:
190
191\begin{equation}
192\textbf{w}' = 0000011 \quad \textbf{w}'' =0010101 \quad \hat{\textbf{w}} = 0000001
193\end{equation}
194
195In this case, we can be sure that $\textbf{w}''$ was transmitted.
196\end{example}
197
198In most cases, the number $n$ of bits we can transmit and the amount of errors $r$ to be corrected are given; one of the main problems of coding theory (and the main topic of this document) is finding $A(n,d)$, the maximum possible size of a code $\mathcal{C} \subseteq \{0,1\}^n$ with distance $d$.
199
200\section{Simple cases}
201\subsection*{$d=1$}
202First of all, any code needs to have distance 1 by definition: otherwise, we would not be able to distinguish between code words. That said, for all $n$, we always have $A(n,1) = 2^n$, which is the number of possible codewords of $n$ bits.
203
204\subsection*{$d=2$}
205To get $A(n,2)$ we start by considering a particular code.
206\begin{lemma}
207 The code $\mathcal{C}$, composed by all $n$-bit codewords of even weight, has distance 2.
208\end{lemma}
209\subsubsection*{Proof}
210We need to prove two things:
211\begin{enumerate}
212 \item $\exists (\mathbf{w},\mathbf{w}') : d_H(\mathbf{w},\mathbf{w}')=2\quad\forall \mathbf{w},\mathbf{w}' \in \mathcal{C}$
213 \item $\nexists (\mathbf{w},\mathbf{w}') : d_H(\mathbf{w},\mathbf{w}')=1\quad\forall \mathbf{w},\mathbf{w}' \in \mathcal{C}$
214\end{enumerate}
215
216To prove point 1, we can just take any word $\mathbf{w}\in \mathcal{C}:|\mathbf{w}|\geq 2$. Then, consider the word $\mathbf{w}'$, obtained by replacing two of the 1's in the word with two 0's.
217$\mathbf{w}'$ has even weight, so we have $\mathbf{w}' \in \mathcal{C}$.
218
219As for point 2, we use a proof by contradiction to show how it's impossible for two words in the code to have distance 1.
220
221Let $\mathbf{w},\mathbf{w}' \in \mathcal{C}$ and suppose $d_H(\mathbf{w},\mathbf{w}')=1$. If $\mathbf{w}$ and $\mathbf{w}'$ have Hamming distance 1, they differ in only 1 bit.
222This means that if $\mathbf{w}$ has even weight, $\mathbf{w}'$ has to have odd weight, and viceversa. This is a contradiction to the hypothesis of all words in $\mathcal{C}$ having even weight, so $\mathbf{w}$ and $\mathbf{w}'$ cannot have Hamming distance 1.
223
224Finally, let's find an upper bound for this code.
225\begin{lemma}
226 Fixed $n$, if $ \mathcal{C} \subseteq \{0,1\}^n : |w| \text{ is even } \forall \mathbf{w}\in \mathcal{C}$, then $|\mathcal{C}| \geq 2^{n-1}$.%TODO: should be equal?
227\end{lemma}
228\subsubsection*{Proof}
229We know that the total number of words in a $n$-bit code is $2^n$. We start with a code made up of $(n-1)$-bit words, and consider the code $\mathcal{C} \subseteq \{0,1\}^n$ obtained by applying the following transformation to each word of the code:
230\begin{itemize}
231 \item if the word has an even weight, add a $0$;
232 \item if the word has an odd weight, add a $1$.
233\end{itemize}
234
235We obtained a code $\mathcal{C}$ which is composed by all the codewords with even weight. Furthermore, $|\mathcal{C}| \geq 2^{n-1}$, which was the number of elements in the starting code. %TODO: should be equal?
236
237This is proof that, for any value of $n$, $A(n,2) = 2^{n-1}$.
238
239\subsection*{$d\geq 3$}
240The previous values of $d$ didn't allow for space to correct any errors.
241
242%todo: show that you need 2r+1 distance to correct r errors.
243Unfortunately, for $d >= 3$, things start to get complicated.
244
245\section{The sphere-packing bound}
246
247For any $n$ and $d$, we can use a \emph{volume argument} to obtain a simple upper bound on $A(n, d)$.
248
249Imagine a box filled with balls. If you wanted to guess how many balls can fit inside it, you could conclude that the number of balls is bounded above by the volume of the container divided by the volume of a single ball.
250
251Let's assume that $d = 2r+1$ is odd and fix any code $\mathcal{C}$ of distance $d$. Now, the set $\{0,1\}^n$ represents the box, containing $|\mathcal{C}|$ Hamming balls. Each ball is defined as the set of all words in $\{0,1\}^n$ with a distance $r$ from a certain word $w \in \mathcal{C}$.
252\begin{equation}
253 B(\mathbf{w}, r) := \{\mathbf{w}' \in \{0,1\}^n : d_H(\mathbf{w}, \mathbf{w}') \leq r\}, \mathbf{w} \in \mathcal{C}.
254\end{equation}
255
256So we have $|\mathcal{C}|$ balls of radius $r$ and distance at least $2r+1$ from one another. This means the balls are disjoint, they're not overlapped. The total number of balls (words in $\mathcal{C}$) cannot be larger than the total number of words in $\{0,1\}^n$ divided by the number of words in a single ball.
257
258The number of words at Hamming distance exactly $i$ from any word $w$ is ${n \choose i}$, which implies:
259
260\begin{equation}
261 |B(\mathbf{w}, r)| = 1 + {n \choose 1} + {n \choose 2} + \dots + {n \choose r} = \sum_{i=0}^{r} {n \choose i},
262\end{equation}
263where each iteration of the sum adds the amount of elements of weight $i$.
264
265Finally, as we said before, the total number of elements in $\{0,1\}^n$ is $2^n$. So, we get the following upper bound:
266
267\begin{lemma}[sphere-packing bound]
268 For all n and r,
269
270 \begin{equation}
271 A(n, 2r+1) \leq \floor*{\frac{2^n}{\sum\limits_{i=0}^{r}{n \choose i}}}.
272 \end{equation}
273\end{lemma}
274
275\begin{example}
276 For $n=7$ and $d=3$, we have $r=1$ so:
277 \begin{equation}
278 A(7,3) \leq \floor*{\frac{2^7}{\sum\limits_{i=0}^{1}{7 \choose i}}} = \floor*{\frac{128}{{7 \choose 0} + {7 \choose 1}}} = \floor*{\frac{128}{1 + 7}} = 16.
279 \end{equation}
280\end{example}
281
282\begin{example}
283 For $n=17$ and $d=3$:
284 \begin{equation}
285 A(17,3) \leq \floor*{\frac{2^{17}}{\sum\limits_{i=0}^{1}{17 \choose i}}} = \floor*{\frac{131072}{{17 \choose 0} + {17 \choose 1}}} = \floor*{\frac{131072}{1 + 17}} = 7281.
286 \end{equation}
287\end{example}
288
289\section{The Delsarte bound}
290\begin{theorem}[Delsarte bound]
291 For integers $n$, $i$, $t$ with $0 \leq i$, $t \leq n$, let us put
292
293 \begin{equation}
294 K_t(n,i) = \sum_{j=0}^{min(i,t)}(-1)^j{i \choose j}{n - i \choose t - j}.
295 \end{equation}
296
297 Then, for every $n$ and $d$, $A(n,d)$ is bounded above by the optimum value of the following linear program in variables $x_0, x_1, \ldots, x_n$:
298
299 \begin{equation}
300 \begin{array}{lll}
301 \text{Maximize} & x_0 + x_1 + \ldots + x_n & \\
302 \text{subject to} & x_0 = 1 & \\
303 & x_i = 0, & i=1,2,\ldots,d-1 \\
304 & \sum\limits_{i=0}^{n}K_t(n,i) \dot x_i \geq 0, & t=1,2,\ldots,n \\
305 & x_0, x_1, \ldots, x_n \geq 0 & \\
306 \end{array}
307 \end{equation}
308\end{theorem}
309
310
311
312%\begin{array}{l}
313%\end{array}