123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471 |
- % Copyright (C) 2017 Koz Ross <koz.ross@retro-freedom.nz>
- % This work is licensed under the Creative Commons Attribution-ShareAlike 4.0
- % International License. To view a copy of this license, visit
- % http://creativecommons.org/licenses/by-sa/4.0/ or send a letter to Creative
- % Commons, PO Box 1866, Mountain View, CA 94042, USA.
- \documentclass{beamer}
- \usepackage[utf8]{inputenc}
- \usecolortheme{seahorse}
- \usefonttheme{professionalfonts}
- \usepackage{fontspec}
- \setmainfont{DejaVu Sans}
- \setbeamertemplate{navigation symbols}{}
- \usepackage{caption}
- \usepackage[font=itshape, begintext=``, endtext='']{quoting}
- \usepackage{fancyvrb}
- \usepackage{color}
- \usepackage{booktabs}
- \usepackage[noend]{algpseudocode}
- \title{CONS cells}
- \subtitle{Or: start simple!}
- \titlegraphic{\includegraphics[scale=0.8]{img/logo.pdf}}
- \author{Glen Osborne}
- \date{3rd August, 2017}
- \begin{document}
- \begin{frame}[c]
- \titlepage{}
- \end{frame}
- \begin{frame}[c]
- \frametitle{Outline}
- \tableofcontents
- \end{frame}
- \section{Introduction}
- \begin{frame}[c]
- \frametitle{All those data structures\ldots}
- We have an {\em embarassment\/} of different data structures available to
- us:\pause{}
- \begin{itemize}
- \item Lists\pause{}
- \item Dictionaries\pause{}
- \item Matrices (with arbitrary dimensions)\pause{}
- \item Trees\pause{}
- \item Graphs\pause{}
- \item And so many more!\pause{}
- \end{itemize}
- These can be {\em very\/} complicated to understand and implement!\pause{}
- They also often `lock you in' to a fixed set of operations --- and whether
- these are the ones you need can be hard to determine when initially solving a
- problem.
- \end{frame}
- \begin{frame}[c, fragile]
- \frametitle{A `novel' proposition}
- \begin{center}
- \includegraphics[scale=0.5]{img/morpheus.pdf}
- \end{center}\pause{}
- Yes, really!\pause{} Not a new idea by any means --- first put forward in the
- 19{\em 50\/}s.\pause{} Let's see how this is possible\ldots
- \end{frame}
- \section{The CONS cell}
- \begin{frame}[c]
- \frametitle{Some definitions}
- \pause{}
- \begin{definition}
- A {\em machine word\/} is a fixed-size chunk of memory.\pause{}
- \end{definition}
-
- \medskip
- What this `fixed size' is doesn't matter --- most modern computers have a 32 or
- 64-bit machine word.\pause{}
- \medskip
- \begin{definition}
- A piece of data ({\em datum\/}) is {\em primitive\/} if it fits into one machine
- word. Otherwise, it is {\em complex}.\pause{}
- \end{definition}
- \medskip
- \begin{definition}
- A {\em reference\/} is a primitive datum, consisting of a memory address.
- \end{definition}\pause{}
-
- \medskip
- We can imagine a reference as `pointing' to another piece of data in
- memory.\pause{} Besides references, we also have integers and floats as
- primitive data (and possibly others as well).
- \end{frame}
- \begin{frame}[c, fragile]
- \frametitle{Gettin' visual with it}
- \begin{overprint}
- \onslide<1>\centerline{\includegraphics[scale=0.3]{img/visual-basic.pdf}}%
- \onslide<2>\centerline{\includegraphics[scale=0.3]{img/visual-box.pdf}}%
- \onslide<3>\centerline{\includegraphics[scale=0.3]{img/visual-data.pdf}}%
- \onslide<4>\centerline{\includegraphics[scale=0.3]{img/visual-reference.pdf}}%
- \end{overprint}
- \vspace{1cm}
- \begin{overprint}
- \onslide<1>We will use drawings like this one to represent data in memory.
- \onslide<2>We represent machine words using boxes. Adjacent boxes are
- adjacent machine words in memory (these two are not).
- \onslide<3>Non-reference primitive data will be drawn inside the box representing its
- machine word.
- \onslide<4>References will be drawn as arrows to the data they `point' to
- (this one isn't pointing anywhere useful).
- \end{overprint}
- \end{frame}
- \begin{frame}[c, fragile]
- \frametitle{The CONS cell}
- \centerline{\includegraphics[scale=0.3]{img/cons-cell.pdf}}\pause{}
- \medskip
- \begin{itemize}
- \item Consists of two adjacent machine words\pause{}
- \item A CONS cell's first word is called its {\em CAR\/} (pronounced
- like `motor vehicle')\pause{}
- \item A CONS cell's second word is called its {\em CDR\/} (pronounced
- like `COULD-ruh')\pause{}
- \item Either the CAR or the CDR can store reference, or primitive
- non-reference, data as needed\pause{}
- \item Can implement {\em every single one\/} of the data structures
- mentioned at the start of this talk!
- \end{itemize}
- \end{frame}
- \begin{frame}[c, fragile]
- \frametitle{What you're probably thinking right now}
- \only<1>{\centerline{\includegraphics[scale=0.15]{img/doge.pdf}}}
- \only<2>{\centerline{\includegraphics[scale=0.14]{img/ducreux.pdf}}}
- \end{frame}
- \section{Building up other structures}
- \begin{frame}[c]
- \frametitle{Lists}
-
- \begin{definition}
- {\em NIL\/} refers to a unique machine word representing `no useful data'.
- \end{definition}\pause{}
- \medskip
- Initially, we'll assume that lists only store primitive data. We'll later show
- how to overcome this.\pause{}
- \medskip
- The intuition here is based on the fact that a CONS cell looks a lot like a
- singly-linked list node.\pause{} More precisely, we can store each node's {\em
- data\/} in the CAR, and {\em `next' reference\/} in the CDR.\pause{} For
- the last node, we can have its `next' reference `point to' NIL to mark the
- end of the list.\pause{}
- \medskip
- Using this approach, we can define all the usual list operations in a
- straightforward way.
- \end{frame}
- \begin{frame}[c, fragile]
- \frametitle{Visualizing CONS cell lists}
- \begin{overprint}
- \onslide<1>\centerline{\includegraphics[scale=0.2]{img/list-basic.pdf}}%
- \onslide<2>\centerline{\includegraphics[scale=0.2]{img/list-conses.pdf}}%
- \onslide<3>\centerline{\includegraphics[scale=0.2]{img/list-cars.pdf}}%
- \onslide<4>\centerline{\includegraphics[scale=0.2]{img/list-cdrs.pdf}}%
- \end{overprint}
- \vspace{2cm}
- \onslide<1->This is the in-memory representation of the list {\tt [10, 20,
- 30]} using CONS cells. \onslide<2->Each CONS cell is one list node.
- \onslide<3->CARs contain the data. \onslide<4->CDRs contain references to
- the next list node, or NIL for the end.
- \end{frame}
- \begin{frame}[c]
- \frametitle{Lists with complex data}
- \begin{itemize}
- \item Most interesting data won't fit into a single machine word.\pause{}
- \item To have lists storing complex data, we have the CARS of our list node
- CONS cells be {\em references\/} to the data elsewhere in memory.\pause{}
- \item We can even mix-and-match the two!
- \end{itemize}
- \end{frame}
- \begin{frame}[c, fragile]
- \frametitle{Visualizing lists with complex data}
- \begin{overprint}
- \onslide<1>\centerline{\includegraphics[scale=0.2]{img/list-complex.pdf}}%
- \onslide<2>\centerline{\includegraphics[scale=0.2]{img/list-complex-prim.pdf}}%
- \onslide<3>\centerline{\includegraphics[scale=0.2]{img/list-complex-comp.pdf}}%
- \end{overprint}
- \vspace{1cm}
- \onslide<1->This is the in-memory representation of the list {\tt ["My", 10,
- "sons"]} using CONS cells. \onslide<2->Primitive data is stored in the CAR of
- the relevant cell as before. \onslide<3->Complex data is stored elsewhere, and
- the CDRs of the relevant cells store references to that data instead.
- \end{frame}
- \begin{frame}[c]
- \frametitle{Dictionaries}\pause{}
- \begin{definition}
- A {\em dictionary\/} stores a set of key-value pairs, such that keys in the
- dictionary are unique.
- \end{definition}\pause{}
-
- \medskip
- We can represent a key-value pair by using a CONS cell where both the CAR and
- CDR store data (or a reference to data).\pause{} We can then create a list of
- these as complex data.\pause{} To ensure that we don't end up with key
- duplication, we do two things:\pause{}
- \medskip
- \begin{itemize}
- \item Ensure that queries always start from the first CONS cell in the
- list\pause{}
- \item Put inserts at the front of the list (so they'll be found before any
- previous entries with the same key)
- \end{itemize}
- \end{frame}
- \begin{frame}[c, fragile]
- \frametitle{Example of CONS cell-based dictionary}
- \begin{overprint}
- \onslide<1>\centerline{\includegraphics[scale=0.2]{img/dictionary-basic.pdf}}%
- \onslide<2>\centerline{\includegraphics[scale=0.2]{img/dictionary-spine.pdf}}%
- \end{overprint}
- \vspace{0.5cm}
- \onslide<1->This is the in-memory representation of the dictionary $\{(10,
- \text{"foo"}), (\text{"bar"}, 20), (\text{"baz"},\text{"quux"})\}$.
- \onslide<2->The entries form a list of complex data (the {\em spine\/} of
- the dictionary).
- \end{frame}
- \begin{frame}[c]
- \frametitle{A slight intermission}
- Given that we've now described how to define dictionaries using CONS cells, we
- have the ability to define any other data structure (as explained in the
- previous talk).\pause{} However, we can implement some structures a bit better
- than this using CONS cells, including:\pause{}
- \medskip
- \begin{itemize}
- \item Matrices\pause{}
- \item Trees\pause{}
- \item Graphs\pause{}
- \end{itemize}
- \medskip
- To save time, we will only draw diagrams where
- useful.\pause{} If we refer to lists or dictionaries anywhere, assume we mean
- ones based on CONS cells as described previously.
- \end{frame}
- \begin{frame}[c]
- \frametitle{Matrices}
- \begin{definition}
- The {\em rank\/} of a matrix is the number of dimensions it has.
- \end{definition}\pause{}
- \medskip
- A rank $1$ `matrix' is just a list.\pause{} A rank
- $2$ matrix is a list of lists, a rank $3$ matrix is a list of rank $2$
- matrices, and so on, and so forth.\pause{}
- \medskip
- We can define a rank $n$ matrix (for $n > 1$) as a spine of rank $n -
- 1$ matrices.
- \end{frame}
- \begin{frame}[c]
- \frametitle{Matrix example}
- Consider the following rank $2$ matrix:\pause{}
- \[
- \begin{bmatrix}
- 1 & 2 & 3 & 4\\
- 5 & 6 & 7 & 8\\
- 9 & 10 & 11 & 12\\
- \end{bmatrix}
- \]
- \medskip
- We can represent it in two ways.\pause{} We can have the spine go along the columns:
-
- \medskip
- {\tt [[1, 5, 9], [2, 6, 10], [3, 7, 11], [4, 8, 12]]}\pause{}
- \medskip
- \noindent{}or along the rows:
- \medskip
- {\tt [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]}
- \end{frame}
- \begin{frame}[c]
- \frametitle{Trees}
- We will begin with a very simple kind of tree: a {\em binary leaf
- tree}.\pause{} More precisely:
- \medskip
- \begin{definition}
- A {\em binary leaf tree\/} has data only in its leaf nodes, and its internal
- nodes have a maximum of two children.\pause{}
- \end{definition}
- \medskip
- We can represent a binary leaf tree using CONS cells as follows:\pause{}
- \medskip
- \begin{itemize}
- \item An internal node is represented by a CONS cell whose CAR stores a
- reference to its left child (or NIL if none) and whose CDR stores a
- reference to its right child (or NIL if none)\pause{}
- \item A leaf node is represented by a CONS cell storing the same data (or
- reference) in both its CAR and CDR
- \end{itemize}
- \end{frame}
- \begin{frame}[c, fragile]
- \frametitle{Example binary leaf tree and its CONS cell representation}
- \begin{columns}
- \column{.5\textwidth}
- \centerline{\includegraphics[scale=0.6]{img/leaf-tree.pdf}}%
- \column{.5\textwidth}
- \centerline{\includegraphics[scale=0.15]{img/leaf-tree-cons.pdf}}%
- \end{columns}
- \end{frame}
- \begin{frame}[c]
- \frametitle{Rose trees}
- If we need trees of arbitrary branching factor (number of children), or where
- data has to be stored in the leaves as well, we can instead use a {\em rose
- tree\/} representation:\pause{}
- \medskip
- \begin{itemize}
- \item A leaf node is represented the same way as for binary leaf
- trees\pause{}
- \item An internal node is represented by a CONS cell whose CAR stores the
- data for that node (or a reference to the data), and whose CDR stores a
- list of references to its children in left-to-right order\pause{}
- \end{itemize}
- \medskip
- We can also include a parent reference in the list of children (typically as
- the first element) if we want.
- \end{frame}
- \begin{frame}[c]
- \frametitle{Graphs}
- Typically, graphs are stored as an {\em adjacency matrix\/} or an {\em
- adjacency list}.\pause{} The adjacency matrix is easy to represent using a
- rank $2$ matrix as we defined before.\pause{} The adjacency list is a bit
- trickier:\pause{}
- \medskip
- \begin{itemize}
- \item An adjacency list is represented by an {\em adjacency spine\/}\pause{}
- \item An adjacency spine is made up of {\em vertex cells\/}\pause{}
- \item A vertex cell is a CONS cell whose CAR stores a reference to the next
- vertex cell in the adjacency spine, and whose CDR stores a {\em vertex
- list}\pause{}
- \item A vertex list is a CONS cell whose CAR stores the vertex's label (or a
- reference to it), and whose CDR stores a reference to a {\em child
- list\/}\pause{}
- \item A child list's data are references to elements of the adjacency spine,
- corresponding to the neighbours of that vertex
- \end{itemize}
- \end{frame}
- \section{Limitations and uses}
- \begin{frame}[c]
- \frametitle{The bad news}
- All of these are really neat, but unfortunately, these implementations can be
- slow, especially when we have a lot of data, because:\pause{}
- \medskip
-
- \begin{itemize}
- \item Many operations involve scanning `chains' of CONS cells, which give us
- linear (or worse!) time complexity\pause{}
- \item CONS cells are not cache-friendly; typically, we'll only cache the
- cell itself, and {\em not\/} any data its CAR or CDR might hold references
- to\pause{}
- \item This means that we could potentially be getting cache misses {\em
- every time\/} we follow a stored reference!\pause{}
- \item There are ways to make CONS cell-based structures more cache-friendly,
- but they're usually more trouble than they're worth
- \end{itemize}
- \end{frame}
- \begin{frame}[c]
- \frametitle{So why use them?}
- \begin{itemize}
- \item When we initially set out to solve a problem, we don't often know what
- operations we need, how much data we'll have, or which operations need to
- be fast\pause{}
- \item CONS cell-based structures can be built and {\em extended\/}
- quickly\pause{}
- \item This makes them {\em ideal\/} for prototyping\pause{}
- \item There is no {\em fast\/} --- only fast {\em enough\/}; you might find CONS cells are
- good enough for your purposes!\pause{}
- \item Once you know exactly what you need, you can always replace some (or
- all) of the structure with something more efficient\pause{}
- \end{itemize}
- \medskip
- In short, CONS cell-based structures can help you {\em learn your tradeoffs\/}
- quickly and easily.
- \end{frame}
- \section{Questions}
- \begin{frame}[fragile, c]
- \frametitle{Questions?}
- \begin{center}
- \includegraphics[scale=0.27]{img/questions.pdf}
- \end{center}
- \end{frame}
- \end{document}
|