talk.tex 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. % Copyright (C) 2017 Koz Ross <koz.ross@retro-freedom.nz>
  2. % Copyright (C) 2008 Karta24
  3. % This work is licensed under the Creative Commons Attribution-ShareAlike 4.0
  4. % International License. To view a copy of this license, visit
  5. % http://creativecommons.org/licenses/by-sa/4.0/ or send a letter to Creative
  6. % Commons, PO Box 1866, Mountain View, CA 94042, USA.
  7. % Karta's attribution applies to the image '2-generals-dead-messenger.svg', per
  8. % the requirements of the CC-BY-SA license.
  9. \documentclass[style=fyma]{powerdot}
  10. \usepackage{enumitem}
  11. \usepackage{graphicx}
  12. \usepackage{amsmath}
  13. \usepackage{amsfonts}
  14. \usepackage{amsthm}
  15. \usepackage[font={itshape},begintext={``},endtext={''}]{quoting}
  16. \newtheorem*{theorem}{Theorem}
  17. \newtheorem*{observation}{Observation}
  18. \pdsetup{theslide=,
  19. palette=gray}
  20. \begin{document}
  21. \begin{slide}[toc=]{}
  22. \begin{center}
  23. {\LARGE \textbf{The Two Generals Problem} \par}
  24. {\Large Or: Why distributed systems are Nintendo Hard \par}
  25. \vspace{1cm}
  26. \includegraphics[scale=0.75]{logo.eps}
  27. \vspace{1cm}
  28. \linebreak
  29. Koz Ross
  30. \medskip
  31. March 16, 2017
  32. \end{center}
  33. \end{slide}
  34. \begin{slide}[toc=]{Overview}
  35. \tableofcontents[content=sections]
  36. \end{slide}
  37. \section{Introduction to distributed systems}
  38. \begin{slide}{What is a distributed system?}
  39. A system is {\em distributed\/} if it has: \pause{}
  40. \begin{itemize}
  41. \item Multiple processing units (so we can do things `at the same time')\pause{}
  42. \item Independent failure (one unit failing shouldn't bring down the whole
  43. system)\pause{}
  44. \item Unreliable communication (information about other units is limited and
  45. uncertain, connections between units can fail unpredictably)\pause{}
  46. \end{itemize}
  47. Systems which are not distributed are called otherwise, {\em singular\/} (or
  48. {\em centralized\/} in some contexts).
  49. \end{slide}
  50. \begin{slide}{Examples of distributed systems}
  51. {\bf Example 1:} Social networks (like Facebook, Twitter, etc)\pause{}
  52. \medskip
  53. \begin{itemize}
  54. \item Multiple processing units (client apps, web browsers, servers)\pause{}
  55. \item Independent failure (if your computer catches fire, everyone else can
  56. still tell each other what they had for breakfast today)\pause{}
  57. \item Unreliable communication (timelines can go out of sync, trending posts
  58. can vary, updates might be lost or take a long time)
  59. \end{itemize}
  60. \end{slide}
  61. \begin{slide}[toc=]{Examples of distributed systems}
  62. {\bf Example 2:} Torrents (legitimate or otherwise)\pause{}
  63. \medskip
  64. \begin{itemize}
  65. \item Multiple processing units (everyone can seed or download different
  66. things, or parts of them, independently, at the same time)\pause{}
  67. \item Independent failure (if a single peer disconnects or runs slowly,
  68. everyone else can still download or seed)\pause{}
  69. \item Unreliable communication (peers randomly join and leave, network
  70. failure to particular peers, slow connections$\ldots$)
  71. \end{itemize}
  72. \end{slide}
  73. \begin{slide}[toc=]{Examples of distributed systems}
  74. {\bf Example 3:} Human society (at any scale)\pause{}
  75. \medskip
  76. \begin{itemize}
  77. \item Multiple processing units (me teaching a class at 10am happens whether
  78. or not you care or show up)\pause{}
  79. \item Independent failure (society doesn't stop because one person gets sick
  80. or dies)\pause{}
  81. \item Unreliable communication (every sitcom ever$\ldots$)\pause{}
  82. \end{itemize}
  83. This shows that distributed systems don't just apply to computing!
  84. \end{slide}
  85. \begin{slide}{Why do we care?}
  86. \pause{}
  87. \begin{itemize}
  88. \item Distributed systems are {\em useful\/}\pause{}
  89. \item Distributed systems are {\em everywhere\/}\pause{}
  90. \item Distributed systems are {\em weird\/} (and thus, Nintendo Hard)
  91. \pause{}
  92. \end{itemize}
  93. Whatever you plan to do, distributed systems, their creation, maintenance and
  94. use {\em will\/} be {\em your\/} problem.\pause{} Understanding their
  95. weirdness is essential to making them behave and do what you want.\pause{}
  96. \medskip
  97. The alternative to understanding distributed systems is {\em awful\/} (look at AWS
  98. and Github, and that's just recently!).
  99. \end{slide}
  100. \section{The Two Generals Problem}
  101. \begin{slide}{Problem overview}
  102. \begin{center}
  103. \includegraphics[scale=0.4]{2-generals.eps}
  104. \end{center}
  105. \pause{}
  106. \vspace{1cm}
  107. Both generals {\em must\/} attack at the same time if they want to take the
  108. city.\pause{} However, they haven't agreed on a time before they set up camp.
  109. \end{slide}
  110. \begin{slide}[toc=]{Problem overview}
  111. \begin{center}
  112. \includegraphics[scale=0.4]{2-generals-messenger.eps}
  113. \end{center}
  114. \vspace{1cm}
  115. The only way the generals can communicate with each other is by sending
  116. messengers between their camps.\pause{} In order for the messengers to get
  117. there in a timely manner, they must run through the city.
  118. \end{slide}
  119. \begin{slide}[toc=]{Problem overview}
  120. \begin{center}
  121. \includegraphics[scale=0.4]{2-generals-dead-messenger.eps}
  122. \end{center}
  123. \vspace{1cm}
  124. Some messengers won't make it.\pause{} Thus, neither general knows which of
  125. their messages got delivered, or what the chance of any given message getting
  126. delivered is.
  127. \end{slide}
  128. \begin{slide}[toc=]{Problem overview}
  129. \begin{center}
  130. \includegraphics[scale=0.4]{2-generals-dead-messenger.eps}
  131. \end{center}
  132. \vspace{1cm}
  133. {\bf Question:} Is it possible for both generals to agree on an attack time
  134. with {\em total\/} certainty?\pause{} {\em No.}
  135. \end{slide}
  136. \begin{slide}{Preliminaries}
  137. A {\em message\/} is some $x \in \mathbb{N}$.\pause{} An {\em outbox\/} is a list
  138. of sent messages, in the order they were sent.\pause{}
  139. \medskip
  140. Each general $G$ has an outbox $\mathrm{out}(G)$, and a {\em decision\/}
  141. $d(G)$, where $d(G) \in \mathbb{N} \cup \{\mathrm{undef}\}$.\pause{}
  142. Initially, any general $G$ has $\mathrm{out}(G) = \emptyset$ and $d(G) =
  143. \mathrm{undef}$.\pause{} Each general is only aware of their own outbox and
  144. decision at any given time.\pause{}
  145. \medskip
  146. We also define a {\em success probability\/} $P \in (0, 1)$.\pause{} This
  147. can change as messages get sent.\pause{} Neither of the generals know anything
  148. about $P$ beyond these two facts.\pause{}
  149. \medskip
  150. We will refer to our generals as $C$ and $L$ (for no particular reason
  151. whatsoever).
  152. \end{slide}
  153. \begin{slide}[toc=]{Preliminaries}
  154. A general $G$ can send a message $m$ to another general $G^{\prime}$. To do
  155. this, we add $m$ to the end of $\mathrm{out}(G)$.\pause{} Then, we generate
  156. a random $p \in (0,1)$ and compare it to $P$: if $p \leq P$, then the
  157. message {\em arrives}, otherwise, it is {\em lost}.\pause{}
  158. \medskip
  159. If $m$ arrives, we set $d(G^{\prime}) = m$ (the generals are genuine and
  160. don't want to sabotage each other).\pause{}
  161. \medskip
  162. We say that our generals have {\em reached agreement\/} if:
  163. \begin{itemize}
  164. \item $d(C) \neq \mathrm{undef}$ and $d(L) \neq \mathrm{undef}$; and
  165. \item $d(C) = d(L)$
  166. \end{itemize}
  167. \end{slide}
  168. \begin{slide}{Problem statement}
  169. \begin{theorem}
  170. There is no $\mathrm{out}(C),\mathrm{out}(L)$ such that $C, L$ are
  171. guaranteed to reach agreement with probability 1.
  172. \end{theorem}\pause{}
  173. \bigskip
  174. Note that this does {\em not\/} claim that we cannot {\em ever\/} reach
  175. agreement --- only that we can't be certain that we will, given only the
  176. outboxes of both generals.
  177. \end{slide}
  178. \begin{slide}{Proof}
  179. \begin{observation}
  180. If $\mathrm{out}(C) = \mathrm{out}(L) = \emptyset$, then we cannot have
  181. reached agreement.
  182. \end{observation}\pause{}
  183. This is intentional: if nobody's sent any messages, clearly nobody's made any
  184. decisions, and thus, nobody's agreed to anything.
  185. \end{slide}
  186. \begin{slide}[toc=]{Proof}
  187. \begin{proof}
  188. Suppose for the sake of a contradiction that there exist some
  189. $\mathrm{out}(C), \mathrm{out}(L)$ such that we can guarantee $C,L$ reaching
  190. agreement with probability 1. Consider some message $m$ in either outbox, as
  191. well as $P$ at the time $m$ was sent.\pause{}
  192. \medskip
  193. As $P \neq 1$, the probability that $m$ was lost is $1 - P \neq 0$.\pause{} As
  194. after sending each message in $\mathrm{out}(C), \mathrm{out}(L)$, we are
  195. guaranteed to reach agreement with probability 1, $m$ arriving cannot have
  196. been essential for reaching agreement.\pause{} Thus, even if we don't send $m$,
  197. we will still reach agreement with probability 1.\pause{}
  198. \medskip
  199. As $m$ is an arbitrary message, it follows that we can reach agreement when
  200. $\mathrm{out}(C) = \mathrm{out}(L) = \emptyset$.\pause{} However, this is a
  201. contradiction, as by definition, this is impossible. Thus, no such
  202. $\mathrm{out}(C), \mathrm{out}(L)$ can exist.
  203. \end{proof}
  204. \end{slide}
  205. \section{Consequences}
  206. \begin{slide}{For computer scientists}
  207. \begin{itemize}
  208. \item Whenever we need {\em any\/} agreement on state between components in
  209. a distributed system, we are going to have a bad time.\pause{}
  210. \item Even something as simple as {\em shared clocks\/} become an
  211. issue!\pause{}
  212. \item Directly leads to two famous results: the FLP impossibility theorem
  213. (Fischer, Lynch and Paterson, 1985) and the CAP theorem (Brewer,
  214. 1998).\pause{}
  215. \item A lot of work on singular systems is next-to-worthless for distributed
  216. ones.\pause{}
  217. \item There are lots of ways to obtain coherency and agreement in
  218. distributed systems, but {\em none\/} will be as perfect as a singular one
  219. (although you might not care in some cases).
  220. \end{itemize}
  221. \end{slide}
  222. \begin{slide}{For practitioners}
  223. \begin{itemize}
  224. \item When we need agreement or synchronization, there {\em will\/} be serious
  225. costs that have to be considered.\pause{}
  226. \item If you see (or need to be involved with) any distributed system with heavy
  227. amounts of global information that must be kept coherent, {\em
  228. run}.\pause{}
  229. \item Know your tradeoffs --- if you can avoid needing a lot of
  230. synchronization or information sharing, definitely avoid it, or at least
  231. limit its impact.\pause{}
  232. \item Understand what the computer scientists have said about distributed
  233. systems --- it's not nearly as theoretical as you think, and it might save
  234. your job one day.
  235. \end{itemize}
  236. \end{slide}
  237. \begin{slide}{A final thought}
  238. \vspace{1cm}
  239. \begin{quoting}
  240. You know you have a distributed system when the crash of a computer you've
  241. never heard of stops you from getting any work done.
  242. \end{quoting}
  243. Leslie Lamport
  244. \end{slide}
  245. \section{Questions}
  246. \end{document}