git rekt — codes.git (master): src/index.tex

src/index.tex (view raw)
  1\maketitle
  2
  3\
  4{\em Coding theory} is a branch of mathematics and computer science that deals with the properties of {\em codes} and their respective fitness for specific applications. One of the key areas of research in coding theory is the study of bounds on the performance of codes: two of the most important bounds in this area are the {\em sphere-packing bound} and the {\em Delsarte bound}. In this brief article, we will take a closer look at these bounds, exploring their key concepts and applications in modern coding theory, in order to show how they can be used to optimize the performance of a code.
  5
  6
  7\section{Introduction}
  8Suppose we're trying to build a remote with 16 keys for a DVD player. The most natural option would be to transmit 4-bit sequences, since there are $2^4=16$ possible sequences (or {\em words}) that can be arranged with 4 bits.
  9
 10However, a problem arises as soon as the situation gets more realistic. If the transmission is not reliable, there is a probability for each bit in the transmission to be received incorrectly; this will lead to a lot of wrong inputs and is thus a flaw of the device.
 11
 12We could try reducing the error rate by tripling each of the four transmitted bits, so instead of tarnsmitting {\em abcd} we would transmit {\em aaabbbcccddd}, but this leads to another problem: we're using 12 bits! This will exhaust the battery of our remote control much faster.
 13
 14\begin{definition}[Hamming distance]
 15Given two code words $\textbf{w}, \textbf{w}' \in \{0,1\}^n$, the \emph{Hamming distance} between $\textbf{w}$ and $\textbf{w}'$ is the number of bits in which $\textbf{w}$ differs from $\textbf{w}'$:
 16
 17\begin{equation}
 18    d_H(\textbf{w}, \textbf{w}') := |\{j \in \{1, 2, \ldots, n\} : w_j \neq w_j'\}|.
 19\end{equation}
 20\end{definition}
 21In other words, the Hamming distance is the number of errors (bit flips) that need to occur for \textbf{w} to become \textbf{w'}.
 22
 23\begin{example}
 24If $n = 3$, we can form $2^3=8$ unique code words, with the following distances from each other:
 25
 26\begin{table}[!ht]
 27    \centering
 28    \begin{tabular}{|c|c|c|c|c|c|c|c|c|}
 29    \hline
 30        \textbf{$d_H$} & \textbf{101} & \textbf{111} & \textbf{011} & \textbf{001} & \textbf{000} & \textbf{010} & \textbf{110} & \textbf{100} \\ \hline
 31        \textbf{101} & 0 & 1 & 2 & 1 & 2 & 3 & 2 & 1 \\ \hline
 32        \textbf{111} & 1 & 0 & 1 & 2 & 3 & 2 & 1 & 2 \\ \hline
 33        \textbf{011} & 2 & 1 & 0 & 1 & 2 & 1 & 2 & 3 \\ \hline
 34        \textbf{001} & 1 & 2 & 1 & 0 & 1 & 2 & 3 & 2 \\ \hline
 35        \textbf{000} & 2 & 3 & 2 & 1 & 0 & 1 & 2 & 1 \\ \hline
 36        \textbf{010} & 3 & 2 & 1 & 2 & 1 & 0 & 1 & 2 \\ \hline
 37        \textbf{110} & 2 & 1 & 2 & 3 & 2 & 1 & 0 & 1 \\ \hline
 38        \textbf{100} & 1 & 2 & 3 & 2 & 1 & 2 & 1 & 0 \\ \hline
 39    \end{tabular}
 40    \caption{Hamming distances for all possible code words with $n=3$}
 41\end{table}
 42
 43Another way to visualize the Hamming distance is by putting each possible code word in the vertices of a (hyper)cube. Of course, for $n=2$ the cube becomes a square. If every word is placed such that it has only one bit of difference with respect to each of its neighbors, then the Hamming distance between two words will be the number of edges we need to traverse in order to get from one word to the other.
 44
 45\begin{figure}
 46    \centering
 47    \includegraphics[width=0.25\textwidth]{hamming-cube.png}
 48\caption{Hamming cube for $n=3$}
 49\end{figure}
 50
 51\end{example}
 52
 53\begin{definition}[weight of a code-word]
 54Suppose $\textbf{w}$ is a code-word such that $\textbf{w} \in \{0,1\}^n$. The \emph{weight} of $\textbf{w}$ is the number of 1's in $\textbf{w}$:
 55
 56\begin{equation}
 57    |\textbf{w}| := |\{j \in \{1, 2, \ldots, n\} : w_j = 1\}|.
 58\end{equation}
 59
 60\end{definition}
 61\begin{example}
 62Suppose $n=5$.
 63\begin{equation*}
 64    |(1, 0, 0, 1, 1)| = 3 \quad\quad |(1, 0, 0, 0, 0)| = 1 \quad\quad |(0, 0, 1, 1, 1)| = 4
 65\end{equation*}
 66\end{example}
 67
 68\begin{definition}[sum modulo 2 (bit-wise xor)]
 69Given two words $\textbf{w}, \textbf{w}' \in \{0,1\}^n$, we can define the following operation:
 70\begin{equation}
 71    \textbf{w} \oplus \textbf{w}' = ((w_1 + w_1')\text{mod}2, (w_2 + w_2')\text{mod}2, \ldots, (w_n + w_n')\text{mod}2) \in \{0,1\}^n
 72\end{equation}
 73\end{definition}
 74The \emph{sum modulo 2} operation returns a vector of size $n$ where each component  $z_j$ has value:
 75\begin{itemize}
 76    \item 0 if $w_j = w_j'$;
 77    \item 1 if $w_j \neq w_j'$
 78\end{itemize}
 79
 80\begin{example}
 81Suppose $n=6, \textbf{w} = (1, 1, 0, 0, 1, 0), \textbf{w}' = (0, 1, 1, 0, 0, 1)$.
 82\begin{equation*}
 83\begin{array}{l}
 84(1, 1, 0, 0, 1, 0) \,+ \\
 85(0, 1, 1, 0, 0, 1) = \\  \hline
 86(1, 2, 1, 0, 1, 1)
 87\end{array}
 88\quad\rightarrow\quad
 89\begin{array}{l}
 90(1, 2, 1, 0, 1, 1)\text{mod} 2 = \\ \hline
 91(1, 0, 1, 0, 1, 1) = \textbf{w} \oplus \textbf{w}'
 92\end{array}.
 93\end{equation*}
 94\end{example}
 95
 96At this point, it's clear how this identity holds true:
 97\begin{equation}
 98    d_H(\textbf{w}, \textbf{w}') = |\textbf{w} \oplus \textbf{w}'|
 99\end{equation}
100Which means: the weight of the result obtained from the bit-wise xor operation is the Hamming distance between the two code-words.
101
102\begin{proposition}
103    The number of words with Hamming distance exactly $i$ from a given word $\mathbf{w}$ is ${ n \choose i }$:
104
105    \begin{equation}
106        |\{ \mathbf{w} \in \{0,1\}^n : d_H(\mathbf{w},\mathbf{w}') = i\}| = {n \choose i}\quad \forall \mathbf{w'} \in \{0,1\}^n.
107    \end{equation}
108\end{proposition}
109\begin{example}
110    Suppose we want to count the words which have distance $i=2$ from the word $001$ ($n=3$).
111    
112    \begin{equation*}
113        \begin{aligned}
114            \mathbf{w} = 001 \\
115            n = 3 \\
116            i = 2 
117        \end{aligned}
118        \quad
119        \left.\begin{aligned}
120            111\\
121            101\\
122            110
123        \end{aligned}
124        \right\} 3
125        \quad\quad
126        \begin{aligned}
127            \mathbf{w} = 10010 \\
128            n = 5 \\
129            i = 4
130        \end{aligned}
131        \quad
132        \left.\begin{aligned}
133            01100\\
134            11101\\
135            00101\\
136            01011\\
137            01101
138        \end{aligned}
139        \right\} 5
140    \end{equation*}
141
142    \begin{equation*}
143        {3 \choose 2} = \frac{3!}{2!(3-2)!}=\frac{3!}{2!}=3
144        \quad\quad
145        {5 \choose 4} = \frac{5!}{4!(5-4)!}=\frac{5!}{4!}=5
146    \end{equation*}
147
148\end{example}
149
150\begin{definition}[distance of a code]
151In coding theory, any subset $\mathcal{C} \subseteq \{0,1\}^n$ is called a \emph{code}.
152
153A code $\mathcal{C}$ has \emph{distance} $d$ if:
154
155\begin{equation}
156    d_H(\textbf{w}, \textbf{w}') \geq d \quad \forall \;\textbf{w}, \textbf{w}' \in \mathcal{C}.
157\end{equation}
158
159Lastly, for $n, d \geq 0$, let $A(n,d)$ denote the maximum cardinality of a code $\mathcal{C} \subseteq \{0,1\}^n$ with distance $d$.
160\end{definition}
161
162\begin{example}
163For $n=7$, a valid code of distance $d=3$ could be the set:
164
165\begin{equation*}
166\label{sample-code}
167\mathcal{C} =
168\left.
169\begin{cases}
170& 0000000, 0001011, 0010101, 0011110, \;\;\;\\
171& 0100110, 0101101, 0110011, 0111000, \\
172& 1000111, 1001100, 1010010, 1011001, \\
173& 1100001, 1101010, 1110100, 1111111 
174\end{cases}\right\}.
175\end{equation*}
176
177Notice how, in this code, every two distinct words have an Hamming distance of at least $3$.
178\end{example}
179
180\begin{proposition}
181A code $\mathcal{C}$ can correct at most $r$ errors if and only if it has distance $d \geq 2r+1$.
182\end{proposition}
183\begin{example}
184    Suppose $\mathcal{C}$ contains two distinct words $\textbf{w}', \textbf{w}''$ that differ in at most $2r$ bits. For example, with $n=7$ and $r=1$, suppose we receive $\hat{\textbf{w}}$, obtained by flipping $r=1$ bits in one of them:
185\begin{equation*}
186\textbf{w}' = 0000011 \quad \textbf{w}'' =0000101 \quad \hat{\textbf{w}} = 0000001
187\end{equation*}
188At this point, it's impossible to know whether the originally transmitted word was $\textbf{w}'$ or $\textbf{w}''$. This test can be repeated by choosing any code with distance $d=2r$ and flipping $r$ of them.
189
190On the other hand, if $\mathcal{C}$ had a distance of $2r+1$, we would be sure it could correct up to $r$ errors:
191
192\begin{equation*}
193\textbf{w}' = 0000011 \quad \textbf{w}'' =0010101 \quad \hat{\textbf{w}} = 0000001
194\end{equation*}
195
196In this case, we can be sure that $\textbf{w}''$ was transmitted.
197\end{example}
198
199In most cases, the number $n$ of bits we can transmit and the amount of errors $r$ to be corrected are given; one of the main problems of coding theory (and the main topic of this document) is finding $A(n,d)$, the maximum possible size of a code $\mathcal{C} \subseteq \{0,1\}^n$ with distance $d$.
200
201\section{Simple cases}
202\subsection*{$d=1$}
203First of all, any code needs to have distance 1 by definition: otherwise, we would not be able to distinguish between code words. That said, for all $n$, we always have $A(n,1) = 2^n$, which is the number of possible codewords of $n$ bits.
204
205\subsection*{$d=2$}
206To get $A(n,2)$ we start by considering a particular code.
207\begin{lemma}
208    The code $\mathcal{C}$, composed by all $n$-bit codewords of even weight, has distance 2.
209\end{lemma}
210\subsubsection*{Proof}
211We need to prove two things:
212\begin{enumerate}
213    \item $\exists (\mathbf{w},\mathbf{w}') : d_H(\mathbf{w},\mathbf{w}')=2\quad\forall\; \mathbf{w},\mathbf{w}' \in \mathcal{C}$
214    \item $\nexists (\mathbf{w},\mathbf{w}') : d_H(\mathbf{w},\mathbf{w}')=1\quad\forall\; \mathbf{w},\mathbf{w}' \in \mathcal{C}$
215\end{enumerate}
216
217To prove point 1, we can just take any word $\mathbf{w}\in \mathcal{C}:|\mathbf{w}|\geq 2$. Then, consider the word $\mathbf{w}'$, obtained by replacing two of the 1's in the word with two 0's.
218$\mathbf{w}'$ has even weight, so we have $\mathbf{w}' \in \mathcal{C}$.
219
220As for point 2, we use a proof by contradiction to show how it's impossible for two words in the code to have distance 1.
221
222Let $\mathbf{w},\mathbf{w}' \in \mathcal{C}$ and suppose $d_H(\mathbf{w},\mathbf{w}')=1$. If $\mathbf{w}$ and $\mathbf{w}'$ have Hamming distance 1, they differ in only 1 bit.
223This means that if $\mathbf{w}$ has even weight, $\mathbf{w}'$ has to have odd weight, and viceversa. This is a contradiction to the hypothesis of all words in $\mathcal{C}$ having even weight, so $\mathbf{w}$ and $\mathbf{w}'$ cannot have Hamming distance 1.
224
225Finally, let's find an upper bound for this code.
226\begin{lemma}
227    Fixed $n$, if $ \mathcal{C} \subseteq \{0,1\}^n : |w| \text{ is even }\forall\; \mathbf{w}\in \mathcal{C}$, then $|\mathcal{C}| \geq 2^{n-1}$.%TODO: should be equal?
228\end{lemma}
229\subsubsection*{Proof}
230We know that the total number of words in a $n$-bit code is $2^n$. We start with a code made up of $(n-1)$-bit words, and consider the code $\mathcal{C} \subseteq \{0,1\}^n$ obtained by applying the following transformation to each word of the code:
231\begin{itemize}
232    \item if the word has an even weight, add a $0$;
233    \item if the word has an odd weight, add a $1$.
234\end{itemize}
235
236We obtained a code $\mathcal{C}$ which is composed by all the codewords with even weight. Furthermore, $|\mathcal{C}| \geq 2^{n-1}$, which was the number of elements in the starting code. %TODO: should be equal?
237
238This is proof that, for any value of $n$, $A(n,2) = 2^{n-1}$.
239
240\subsection*{$d\geq 3$}
241The previous values of $d$ didn't allow for space to correct any errors.
242
243%TODO: show that you need 2r+1 distance to correct r errors.
244Unfortunately, for $d \geq 3$, things start to get complicated.
245
246\subsection{The sphere-packing bound}
247
248For any $n$ and $d$, we can use a \emph{volume argument} to obtain a simple upper bound on $A(n, d)$.
249
250Imagine a box filled with balls. If you wanted to guess how many balls can fit inside it, you could conclude that the number of balls is bounded above by the volume of the container divided by the volume of a single ball.
251
252Let's assume that $d = 2r+1$ is odd and fix any code $\mathcal{C}$ of distance $d$. Now, the set $\{0,1\}^n$ represents the box, containing $|\mathcal{C}|$ Hamming balls. Each ball is defined as the set of all words in $\{0,1\}^n$ with a distance $r$ from a certain word $w \in \mathcal{C}$.
253\begin{equation}
254    B(\mathbf{w}, r) := \{\mathbf{w}' \in \{0,1\}^n : d_H(\mathbf{w}, \mathbf{w}') \leq r\}, \mathbf{w} \in \mathcal{C}.
255\end{equation}
256
257\begin{figure}[ht!]
258    \label{sphere-vis}
259    \centering
260    \fbox{\includegraphics[width=\textwidth]{sphere-packing.png}}
261\caption{A visualization of the sphere-packing bound.}
262\end{figure}
263
264So we have $|\mathcal{C}|$ balls of radius $r$ and distance at least $2r+1$ from one another. This means the balls are disjoint, they're not overlapped. The total number of balls (words in $\mathcal{C}$) cannot be larger than the total number of words in $\{0,1\}^n$  divided by the number of words in a single ball.
265
266The number of words at Hamming distance exactly $i$ from any word $w$ is ${n \choose i}$, which implies:
267
268\begin{equation*}
269    |B(\mathbf{w}, r)| = 1 + {n \choose 1} + {n \choose 2} + \dots + {n \choose r} = \sum_{i=0}^{r} {n \choose i},
270\end{equation*}
271where each iteration of the sum adds the amount of elements of weight $i$.
272
273Finally, as we said before, the total number of elements in $\{0,1\}^n$ is $2^n$. So, we get the following upper bound:
274
275\begin{lemma}[sphere-packing bound]
276    For all n and r,
277
278    \begin{equation}
279        A(n, 2r+1) \leq \floor*{\frac{2^n}{\sum\limits_{i=0}^{r}{n \choose i}}}.
280    \end{equation}
281\end{lemma}
282
283\begin{example}
284    For $n=7$ and $d=3$, we have $r=1$ so:
285    \begin{equation*}
286        A(7,3) \leq \floor*{\frac{2^7}{\sum\limits_{i=0}^{1}{7 \choose i}}} = \floor*{\frac{128}{{7 \choose 0} + {7 \choose 1}}} = \floor*{\frac{128}{1 + 7}} = 16.
287    \end{equation*}
288\end{example}
289
290\begin{example}
291    For $n=17$ and $d=3$:
292    \begin{equation*}
293        A(17,3) \leq \floor*{\frac{2^{17}}{\sum\limits_{i=0}^{1}{17 \choose i}}} = \floor*{\frac{131072}{{17 \choose 0} + {17 \choose 1}}} = \floor*{\frac{131072}{1 + 17}} = 7281.
294    \end{equation*}
295\end{example}
296
297\section{The Delsarte bound}
298\begin{theorem}[Delsarte bound]
299    For integers $n$, $i$, $t$ with $0 \leq i$, $t \leq n$, let us put
300
301    \begin{equation}
302        K_t(n,i) = \sum_{j=0}^{min(i,t)}(-1)^j{i \choose j}{n - i \choose t - j}.
303    \end{equation}
304    where $K_t$ is the \emph{Krawtchouk polynomial} of degree $t$.
305
306    Then, for every $n$ and $d$, $A(n,d)$ is bounded above by the optimum value of the following linear program in variables $x_0, x_1, \ldots, x_n$:
307
308    \begin{equation}
309        \begin{array}{lll}
310            \text{Maximize}   & x_0 + x_1 + \ldots + x_n                       &                  \\
311            \text{subject to} & x_0 = 1                                        &                  \\
312                              & x_i = 0,                                       & i=1,2,\ldots,d-1 \\
313                              & \sum\limits_{i=0}^{n}K_t(n,i) \cdot x_i \geq 0, & t=1,2,\ldots,n   \\
314                              & x_0, x_1, \ldots, x_n \geq 0                   &                  \\
315        \end{array}
316    \end{equation}
317\end{theorem}
318
319\subsection{Making sense of these constraints}
320For each code $\mathcal{C} \subseteq \{0,1\}^n$ we associate $\mathbf{\tilde{x}}=(\tilde{x}_1, \tilde{x}_2, \ldots, \tilde{x}_n)$ such that each $\tilde{x}_i$ is a nonnegative real number and $\tilde{x}_1+\tilde{x}_2+\ldots+\tilde{x}_n=|\mathcal{C}|$.
321We need to prove that, if $\mathcal{C}$ has distance $d$, then $\mathbf{\tilde{x}}$ is a feasible solution for Delsarte's linear program.
322The optimal solution of that linear program (the maximum) is greater or equal to the size of any existing code $\mathcal{C}$ with distance $d$. % TODO: rivedere questa cosa???
323Given $\mathcal{C} \subseteq \{0,1\}^n$, we define each $\tilde{x}_i$ as the number of words in $\mathcal{C}$ which have distance $i$, divided by the total number of words in $\mathcal{C}$:
324\begin{equation}
325    \label{xtildei}
326    \tilde{x}_i = \frac{1}{|\mathcal{C}|} | \{ (\mathbf{w}, \mathbf{w}')\in\mathcal{C}^2:d_H(\mathbf{w}, \mathbf{w}')=i\}|, \quad i=0,1, \ldots,n.
327\end{equation}
328We compute $\tilde{x}_i$ by looking at $\mathcal{C}^2$, so we're counting couples of words with a certain distance $i$. A single couple of words cannot have two different distances, so each couple contributes to one and only one of the $\tilde{x}_i$ variables. For this reason, we have $\tilde{x}_1+\tilde{x}_2+\ldots+\tilde{x}_n=|\mathcal{C}|$.
329Furthermore, since every word only has distance 0 from itself, $\tilde{x}_0$ will always be equal to $\frac{|\mathcal{C}|}{|\mathcal{C}|}=1$.
330If our code $\mathcal{C}$ has distance $d$, then every word in it has distance greater or equal than $d$ from other words. That means $\tilde{x}_1, \tilde{x}_2, \ldots, \tilde{x}_{d-1}=0$.
331\begin{example}
332    Let's try computing $\mathbf{\tilde{x}}$ for a simple code.
333    \begin{equation*}
334        \mathcal{C} \; = \{000, 001, 101, 111\} \quad\quad |\mathcal{C}|=4 \quad\quad n=3
335    \end{equation*}
336    \begin{equation*}
337        \begin{array}{l}
338            \tilde{x}_0 = \frac{1}{4}|\{(000,000), (001, 001), (101, 101), (111, 111)\}|=\frac{4}{4}=1 \\\\
339            \tilde{x}_1 = \frac{1}{4}|\{(000,001), (001,000), (001,101), (101, 001), (101, 111), (111, 101)\}|=\frac{6}{4}=\frac{3}{2}\\\\
340            \tilde{x}_2 = \frac{1}{4}|\{(001,111), (111, 001), (000,101), (101, 000)\}|=\frac{4}{4}=1\\\\
341            \tilde{x}_3 = \frac{1}{4}|\{(000,111), (111, 000)\}|=\frac{2}{4}=\frac{1}{2}\\\\
342            \tilde{x}_0 + \tilde{x}_1 + \tilde{x}_2 + \tilde{x}_3 = 1 + \frac{3}{2} + 1 + \frac{1}{2} = 4 = |\mathcal{C}|
343        \end{array}
344    \end{equation*}
345\end{example}
346\subsection{The final constraint}
347Let $\mathcal{C} \subseteq \{0,1\}^n$ be an arbitrary set, and let $\tilde{x}_i$ be defined as above (\ref{xtildei}) for $i=0,1,\ldots,n$ and $t=1,2,\ldots,n$.
348Then,
349\begin{equation*}
350    \sum_{i=0}^{n}K_t(n,i) \tilde{x}_i \geq 0.
351\end{equation*}
352To prove this, let $I \subseteq \{1,2,\ldots n\}$ be a set of indices and let's define the Hamming distance $d_H^I(\mathbf{w}, \mathbf{w}')$ as the largest number of indices $i \in I$ with $w_i \neq w_i'$:
353\begin{equation}
354    d_H^I(\mathbf{w}, \mathbf{w}') = |\{i \in I : w_i \neq w_i'\}| \quad\forall\; \mathbf{w}, \mathbf{w}' \in \mathcal{C}.
355\end{equation}
356Of course, this is a more general case of our previously defined Hamming distance, obtained when $I = \{1, 2, \ldots, n\}$.
357\begin{example}
358    \begin{equation*}
359        \begin{array}{l}
360            \mathbf{w} \;= (0010011) \\
361            \mathbf{w}' = (1001101) \\\\
362            I = \{1,3,7\} \quad\quad d_H^I(\mathbf{w}, \mathbf{w}') = 2.
363        \end{array}
364    \end{equation*}
365\end{example}
366We can also define a more general weight, as the number of indices $i \in I$ such that $w_i=1$:
367\begin{equation}
368    |\mathbf{w}|_I = |\{i \in I : w_i = 1\}| \quad\forall\;\mathbf{w}\in\mathcal{C}.
369\end{equation}
370\begin{lemma}
371    \label{even-geq-odd}
372    If $I \subseteq \{1,2,\ldots n\}$ is a set of indices and $\mathcal{C} \subseteq \{0,1\}^n$, then the number of ordered couples $(\mathbf{w}, \mathbf{w}') \in \mathcal{C}^2$ with even $d_H^I(\mathbf{w}, \mathbf{w}')$ is at least as large as the number of ordered couples $(\mathbf{w}, \mathbf{w}') \in \mathcal{C}^2$ with odd $d_H^I(\mathbf{w}, \mathbf{w}')$.
373    \begin{proof}
374        First of all, let's prove that any couple of words $\mathbf{w}, \mathbf{w}' \in \mathcal{C}$ has even $d_H$ if $|\mathbf{w}|$ and $|\mathbf{w}'|$ have the same parity.
375        We previously defined $d_H=|\textbf{w} \oplus \textbf{w}'|$. To obtain the weight of the result of the $\oplus$ operation we can also sum the number of 1's in each word and subtract twice the number of positions where both words have 1, $D$ for brevity:
376        
377        \begin{equation*}
378            d_H(\mathbf{w}, \mathbf{w}') = |\mathbf{w} \oplus \mathbf{w}'| = |\textbf{w}| + |\textbf{w}'| + 2D,
379        \end{equation*}
380        where $D=|\{i \in \{1,2,\ldots,n\}:w_i=w_i'=1\}|$.
381        First of all, $2D$ is even, so it doesn't change the parity of result. That only depends on the weight of $\mathbf{w}$ and $\mathbf{w}'$.
382        Let's call $\mathcal{E}$ the set of all words in $\mathcal{C}$ with even weight and $\mathcal{O}$ the set of all words in $\mathcal{C}$ with odd weight. Of course, $\mathcal{E} \cup \mathcal{O} = \mathcal{C}$.
383        \begin{equation*}
384            \mathcal{E} = \{\mathbf{w} \in \mathcal{C} : |\mathbf{w}|\text{ is even}\}, \quad \mathcal{O} = \{\mathbf{w} \in \mathcal{C} : |\mathbf{w}|\text{ is odd}\}.
385        \end{equation*}
386        Then:
387        \begin{itemize}
388            \item for $(\mathbf{w}, \mathbf{w}'): \mathbf{w} \in \mathcal{E} \land \mathbf{w}' \in \mathcal{E}$, $d_H(\mathbf{w}, \mathbf{w}')$ is even;
389            \item for $(\mathbf{w}, \mathbf{w}'): \mathbf{w} \in \mathcal{O} \land \mathbf{w}' \in \mathcal{O}$, $d_H(\mathbf{w}, \mathbf{w}')$ is even;
390            \item for $(\mathbf{w}, \mathbf{w}'): \mathbf{w} \in \mathcal{E} \land \mathbf{w}' \in \mathcal{O}$, $d_H(\mathbf{w}, \mathbf{w}')$ is odd;
391            \item for $(\mathbf{w}, \mathbf{w}'): \mathbf{w} \in \mathcal{O} \land \mathbf{w}' \in \mathcal{E}$, $d_H(\mathbf{w}, \mathbf{w}')$ is odd.
392        \end{itemize}
393        So we can conclude that any couple of words $\mathbf{w}, \mathbf{w}' \in \mathcal{C}$ has even $d_H$ if $|\mathbf{w}|$ and $|\mathbf{w}'|$ have the same parity.
394        That said, the couples of words with even distance are the ones obtained by the cartesian products $\mathcal{E} \times \mathcal{E}=\mathcal{E}^2$ and $\mathcal{O} \times \mathcal{O}=\mathcal{O}^2$; furthermore, the couples of words with odd distance are the ones obtained by the cartesian products $\mathcal{E} \times \mathcal{O}$ and $\mathcal{O} \times \mathcal{E}$.
395        If we want to write our original thesis in these terms, we get the following form, which is clearly always true:
396        \begin{equation*}
397            \begin{array}{l}
398                |\mathcal{E}|^2+|\mathcal{O}|^2\geq |\mathcal{E}|\cdot|\mathcal{O}| + |\mathcal{O}|\cdot|\mathcal{E}|;\\\\
399                |\mathcal{E}|^2+|\mathcal{O}|^2\geq 2\cdot|\mathcal{E}|\cdot|\mathcal{O}|;\\\\
400                |\mathcal{E}|^2+|\mathcal{O}|^2 - 2\cdot|\mathcal{E}|\cdot|\mathcal{O}|\geq 0; \\\\
401                (|\mathcal{E}| - |\mathcal{O}|)^2 \geq 0.
402            \end{array}
403        \end{equation*}
404    \end{proof}
405\end{lemma}
406\begin{lemma}
407    \label{sum-greater-zero-lemma}
408    For every $\mathcal{C} \subseteq \{0,1\}^n$ and every $\mathbf{v}\in\{0,1\}^n$ we have
409    \begin{equation}
410        \label{sum-almost-here}
411        \sum_{(\mathbf{w}, \mathbf{w}')\in\mathcal{C}^2}(-1)^{(\textbf{w} \oplus \textbf{w}')^T\mathbf{v}} \geq 0.
412    \end{equation}
413    \begin{proof}
414        In lemma \ref{even-geq-odd}, we proved that the number of couples of even distance is at least as large as the number of couples of odd distance. This means that their difference must be greater or equal to zero:
415        \begin{equation}
416            \label{evens-odd-geq-zero}
417            \sum_{(\mathbf{w}, \mathbf{w}')\in\mathcal{C}^2}(-1)^{d_H^I(\mathbf{w}, \mathbf{w}')} \geq 0.
418        \end{equation}
419        If we set $I=\{i:v_i=1\}$, then $d_H^I(\mathbf{w}, \mathbf{w}') = (\textbf{w} \oplus \textbf{w}')^T\mathbf{v}$: transposing and doing a scalar product with the vector $\mathbf{v}$ gives the same result as computing the Hamming distance on all indices $i\in I$.
420
421        This means we can replace the distance with this product:
422        \begin{equation*}
423            \sum_{(\mathbf{w}, \mathbf{w}')\in\mathcal{C}^2}(-1)^{d_H^I(\mathbf{w}, \mathbf{w}')} \geq 0
424            =
425            \sum_{(\mathbf{w}, \mathbf{w}')\in\mathcal{C}^2}(-1)^{(\textbf{w} \oplus \textbf{w}')^T\mathbf{v}} \geq 0.
426        \end{equation*}
427    \end{proof}
428    \begin{example}
429        Suppose $\mathbf{w}=(10010)$ and $\mathbf{w}'=(00111)$ with $\mathbf{v}=(10011)$.
430        Then, $I = \{1,4,5\}$. Let's show that $(\textbf{w} \oplus \textbf{w}')^T\mathbf{v} = d_H^I(\mathbf{w}, \mathbf{w}') = 2$:
431        \begin{equation*}
432            \begin{array}{ll}
433                (\textbf{w} \oplus \textbf{w}')^T\mathbf{v} & = [(10010) \oplus (00111)]^T \cdot (10011) \\
434                 & = (10101)^T \cdot (10011) \\
435                 & = 1+0+0+0+1 \\
436                 & = 2 = d_H^I(\mathbf{w}, \mathbf{w}').
437            \end{array}
438        \end{equation*}
439    \end{example}
440        
441    There is also another alternative proof for Lemma \ref{sum-greater-zero-lemma}:
442    \begin{proof}[Proof (algebraic)]
443        First of all, the results of $(\mathbf{w}\oplus\mathbf{w}')^T\mathbf{v}$ and $(\mathbf{w}+\mathbf{w}')^T\mathbf{v}$ have the same parity (the ``modulo 2" operation does not affect parity).
444        
445        Since we're only interested in its parity, we can elevate $-1$ to any of these two and it would give the same result.
446        \begin{equation*}
447            \sum\limits_{(\mathbf{w},\mathbf{w}')\in\mathcal{C}^2}(-1)^{(\mathbf{w}\oplus\mathbf{w}')^T\mathbf{v}} = \sum\limits_{(\mathbf{w},\mathbf{w}')\in\mathcal{C}^2}(-1)^{(\mathbf{w}+\mathbf{w}')^T\mathbf{v}}.
448        \end{equation*}
449
450        At this point, we can use the distributive property over addiction of the scalar product to write:
451        \begin{equation*}
452            \sum\limits_{(\mathbf{w},\mathbf{w}')\in\mathcal{C}^2}(-1)^{(\mathbf{w}+\mathbf{w}')^T\mathbf{v}} =
453            \sum\limits_{(\mathbf{w},\mathbf{w}')\in\mathcal{C}^2}(-1)^{\mathbf{w}^T\mathbf{v}+\mathbf{w}'^T\mathbf{v}}.
454        \end{equation*}
455
456        Now, we can split the exponential:
457        \begin{equation*}
458            \sum\limits_{(\mathbf{w},\mathbf{w}')\in\mathcal{C}^2}(-1)^{\mathbf{w}^T\mathbf{v}+\mathbf{w}'^T\mathbf{v}} =
459            \sum\limits_{(\mathbf{w},\mathbf{w}')\in\mathcal{C}^2}(-1)^{\mathbf{w}^T\mathbf{v}}\cdot (-1)^{\mathbf{w}'^T\mathbf{v}}.
460        \end{equation*}
461
462        We just obtained the definition of the square of a generic polynomial:
463        \begin{equation*}
464            \sum\limits_{(\mathbf{w},\mathbf{w}')\in\mathcal{C}^2}(-1)^{\mathbf{w}^T\mathbf{v}}\cdot (-1)^{\mathbf{w}'^T\mathbf{v}} = \left(\sum\limits_{\mathbf{w}\in\mathcal{C}}(-1)^{\mathbf{w}^T\mathbf{v}}\right)^2 \geq 0,
465        \end{equation*}
466        which is always greater or equal to zero by definition.
467    \end{proof}
468\end{lemma}
469
470\begin{lemma}
471    Now, the only thing left to prove is:
472    \begin{equation*}
473        \sum\limits_{i=0}^{n}K_t(n,i)\tilde{x}_i\geq 0 \quad\forall\; t\in\{1,2,\ldots,n\}.
474    \end{equation*}
475
476    \begin{proof}
477        We start by summing the inequality in (\ref{sum-almost-here}) over all $\mathbf{v}\in \{0,1\}^n$ of weight $t$:
478        \begin{equation*}
479            \sum_{\mathbf{v}\in \{0,1\}^n:|\mathbf{v}|=t}\;\sum_{(\mathbf{w}, \mathbf{w}')\in\mathcal{C}^2}(-1)^{(\textbf{w} \oplus \textbf{w}')^T\mathbf{v}} \geq 0;
480        \end{equation*}
481        then, we change the summation order:
482        \begin{equation*}
483            \sum_{(\mathbf{w}, \mathbf{w}')\in\mathcal{C}^2}\;\sum_{\mathbf{v}\in \{0,1\}^n:|\mathbf{v}|=t}(-1)^{(\textbf{w} \oplus \textbf{w}')^T\mathbf{v}} \geq 0.
484        \end{equation*}
485
486        At this point, let us fix $\mathbf{u} = \textbf{w} \oplus \textbf{w}'$ and re-write our first inequality as:
487        \begin{equation}
488            \label{s-u-inequality}
489            \sum_{(\mathbf{w}, \mathbf{w}')\in\mathcal{C}^2}S(\mathbf{u}) \geq 0,
490        \end{equation}
491        where:
492        \begin{equation*}
493            S(\mathbf{u})=\sum_{\mathbf{v}\in \{0,1\}^n:|\mathbf{v}|=t}(-1)^{\mathbf{u}^T\mathbf{v}}.
494        \end{equation*}
495        
496
497        In the sum of $S(\mathbf{u})$, the $\mathbf{v}$ vectors with $\mathbf{u}^T\mathbf{v}=j$ are counted with sign $(-1)^j$. We can count the number of vectors $\mathbf{v}$ of weight $t$ and with $\mathbf{u}^T\mathbf{v}=j$.
498
499        By definition, $|\mathbf{u}|=d_H(\mathbf{w}, \mathbf{w}') = i$ is the number of 1's in $\mathbf{u}$. To obtain a valid vector $\mathbf{v}$, we need to put $j$ 1's in positions where $\mathbf{u}$ has 0's.
500        Hence, the number of these $\mathbf{v}$ is ${i \choose j}{n-i \choose t-j}$. Then,
501
502        \begin{equation*}
503            S(\mathbf{u})=\sum_{j=0}^{\min(i,t)}(-1)^j{i \choose j}{n-i \choose t-j},
504        \end{equation*}
505        which is the Krawtchouk polynomial of degree $t$: $K_t(n,i)$.
506        
507        We can now go back to (\ref{s-u-inequality}), which becomes:
508        \begin{equation*}
509            \sum_{(\mathbf{w}, \mathbf{w}')\in\mathcal{C}^2}S(\mathbf{u}) = \sum_{(\mathbf{w}, \mathbf{w}')\in\mathcal{C}^2}K_t(n,d_H(\mathbf{w}, \mathbf{w}')) \geq 0.
510        \end{equation*}
511        For each $i$, $K_t(n,i)$ appears in this sum $|\mathcal{C}|\cdot \tilde{x}_i$ times, because each $\tilde{x}_i$ is the number of couples $(\mathbf{w}, \mathbf{w}')$ with distance $i$. 
512
513        We obtain the thesis by changing the sum to include our $\tilde{x}_i$ variables:
514        \begin{equation*}
515            \sum_{i=0}^{n}K_t(n,i)\cdot\tilde{x}_i \geq 0.
516        \end{equation*}
517    \end{proof}
518\end{lemma}