aut-csc
/
cons-cells


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
							% Copyright (C) 2017 Koz Ross <koz.ross@retro-freedom.nz>

% This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 
% International License. To view a copy of this license, visit 
% http://creativecommons.org/licenses/by-sa/4.0/ or send a letter to Creative 
% Commons, PO Box 1866, Mountain View, CA 94042, USA.

\documentclass{beamer}

\usepackage[utf8]{inputenc}
\usecolortheme{seahorse}
\usefonttheme{professionalfonts}
\usepackage{fontspec}
\setmainfont{DejaVu Sans}
\setbeamertemplate{navigation symbols}{}
\usepackage{caption}
\usepackage[font=itshape, begintext=``, endtext='']{quoting}
\usepackage{fancyvrb}
\usepackage{color}
\usepackage{booktabs}
\usepackage[noend]{algpseudocode}

\title{CONS cells}
\subtitle{Or: start simple!}
\titlegraphic{\includegraphics[scale=0.8]{img/logo.pdf}}
\author{Glen Osborne}
\date{3rd August, 2017}

\begin{document}

\begin{frame}[c]
  \titlepage{}
\end{frame}

\begin{frame}[c]
  \frametitle{Outline}
  \tableofcontents
\end{frame}

\section{Introduction}

\begin{frame}[c]
  \frametitle{All those data structures\ldots}

  We have an {\em embarassment\/} of different data structures available to
  us:\pause{}

  \begin{itemize}
    \item Lists\pause{}
    \item Dictionaries\pause{}
    \item Matrices (with arbitrary dimensions)\pause{}
    \item Trees\pause{}
    \item Graphs\pause{}
    \item And so many more!\pause{}
  \end{itemize}

  These can be {\em very\/} complicated to understand and implement!\pause{}
  They also often `lock you in' to a fixed set of operations --- and whether
  these are the ones you need can be hard to determine when initially solving a
  problem.
\end{frame}

\begin{frame}[c, fragile]
  \frametitle{A `novel' proposition}
  \begin{center}
    \includegraphics[scale=0.5]{img/morpheus.pdf}
  \end{center}\pause{}

  Yes, really!\pause{} Not a new idea by any means --- first put forward in the
  19{\em 50\/}s.\pause{} Let's see how this is possible\ldots
\end{frame}

\section{The CONS cell}

\begin{frame}[c]
  \frametitle{Some definitions}
  \pause{}
  \begin{definition}
    A {\em machine word\/} is a fixed-size chunk of memory.\pause{}
  \end{definition}
  
  \medskip

  What this `fixed size' is doesn't matter --- most modern computers have a 32 or
  64-bit machine word.\pause{}

  \medskip

  \begin{definition}
    A piece of data ({\em datum\/}) is {\em primitive\/} if it fits into one machine
    word. Otherwise, it is {\em complex}.\pause{}
  \end{definition}

  \medskip

  \begin{definition}
    A {\em reference\/} is a primitive datum, consisting of a memory address.
  \end{definition}\pause{}
  
  \medskip

  We can imagine a reference as `pointing' to another piece of data in
  memory.\pause{} Besides references, we also have integers and floats as
  primitive data (and possibly others as well).
\end{frame}

\begin{frame}[c, fragile]
  \frametitle{Gettin' visual with it}
  \begin{overprint}
    \onslide<1>\centerline{\includegraphics[scale=0.3]{img/visual-basic.pdf}}%
    \onslide<2>\centerline{\includegraphics[scale=0.3]{img/visual-box.pdf}}%
    \onslide<3>\centerline{\includegraphics[scale=0.3]{img/visual-data.pdf}}%
    \onslide<4>\centerline{\includegraphics[scale=0.3]{img/visual-reference.pdf}}%
  \end{overprint}
  \vspace{1cm}
  \begin{overprint}
    \onslide<1>We will use drawings like this one to represent data in memory.
    \onslide<2>We represent machine words using boxes. Adjacent boxes are
    adjacent machine words in memory (these two are not).
    \onslide<3>Non-reference primitive data will be drawn inside the box representing its
    machine word.
    \onslide<4>References will be drawn as arrows to the data they `point' to
    (this one isn't pointing anywhere useful).
  \end{overprint}
\end{frame}

\begin{frame}[c, fragile]
  \frametitle{The CONS cell}
  \centerline{\includegraphics[scale=0.3]{img/cons-cell.pdf}}\pause{}

  \medskip

  \begin{itemize}
    \item Consists of two adjacent machine words\pause{}
    \item A CONS cell's first word is called its {\em CAR\/} (pronounced 
      like `motor vehicle')\pause{}
    \item A CONS cell's second word is called its {\em CDR\/} (pronounced 
      like `COULD-ruh')\pause{}
    \item Either the CAR or the CDR can store reference, or primitive
      non-reference, data as needed\pause{}
    \item Can implement {\em every single one\/} of the data structures
      mentioned at the start of this talk!
  \end{itemize}
\end{frame}

\begin{frame}[c, fragile]
  \frametitle{What you're probably thinking right now}
  \only<1>{\centerline{\includegraphics[scale=0.15]{img/doge.pdf}}}
  \only<2>{\centerline{\includegraphics[scale=0.14]{img/ducreux.pdf}}}
\end{frame}

\section{Building up other structures}

\begin{frame}[c]
  \frametitle{Lists}
  
  \begin{definition}
    {\em NIL\/} refers to a unique machine word representing `no useful data'.
  \end{definition}\pause{}

  \medskip

  Initially, we'll assume that lists only store primitive data. We'll later show
  how to overcome this.\pause{}

  \medskip

  The intuition here is based on the fact that a CONS cell looks a lot like a
  singly-linked list node.\pause{} More precisely, we can store each node's {\em
  data\/} in the CAR, and {\em `next' reference\/} in the CDR.\pause{} For
  the last node, we can have its `next' reference `point to' NIL to mark the 
  end of the list.\pause{}

  \medskip

  Using this approach, we can define all the usual list operations in a
  straightforward way.
\end{frame}

\begin{frame}[c, fragile]
  \frametitle{Visualizing CONS cell lists}
  \begin{overprint}
    \onslide<1>\centerline{\includegraphics[scale=0.2]{img/list-basic.pdf}}%
    \onslide<2>\centerline{\includegraphics[scale=0.2]{img/list-conses.pdf}}%
    \onslide<3>\centerline{\includegraphics[scale=0.2]{img/list-cars.pdf}}%
    \onslide<4>\centerline{\includegraphics[scale=0.2]{img/list-cdrs.pdf}}%
  \end{overprint}
  \vspace{2cm}
  \onslide<1->This is the in-memory representation of the list {\tt [10, 20, 
  30]} using CONS cells. \onslide<2->Each CONS cell is one list node. 
  \onslide<3->CARs contain the data. \onslide<4->CDRs contain references to 
  the next list node, or NIL for the end.
\end{frame}

\begin{frame}[c]
  \frametitle{Lists with complex data}

  \begin{itemize}
    \item Most interesting data won't fit into a single machine word.\pause{}
    \item To have lists storing complex data, we have the CARS of our list node 
      CONS cells be {\em references\/} to the data elsewhere in memory.\pause{} 
    \item We can even mix-and-match the two!
  \end{itemize}
\end{frame}

\begin{frame}[c, fragile]
  \frametitle{Visualizing lists with complex data}
  \begin{overprint}
    \onslide<1>\centerline{\includegraphics[scale=0.2]{img/list-complex.pdf}}%
    \onslide<2>\centerline{\includegraphics[scale=0.2]{img/list-complex-prim.pdf}}%
    \onslide<3>\centerline{\includegraphics[scale=0.2]{img/list-complex-comp.pdf}}%
  \end{overprint}
  \vspace{1cm}
  \onslide<1->This is the in-memory representation of the list {\tt ["My", 10,
  "sons"]} using CONS cells. \onslide<2->Primitive data is stored in the CAR of
  the relevant cell as before. \onslide<3->Complex data is stored elsewhere, and
  the CDRs of the relevant cells store references to that data instead.
\end{frame}

\begin{frame}[c]
  \frametitle{Dictionaries}\pause{}

  \begin{definition}
    A {\em dictionary\/} stores a set of key-value pairs, such that keys in the
    dictionary are unique.
  \end{definition}\pause{}
  
  \medskip

  We can represent a key-value pair by using a CONS cell where both the CAR and
  CDR store data (or a reference to data).\pause{} We can then create a list of
  these as complex data.\pause{} To ensure that we don't end up with key
  duplication, we do two things:\pause{}

  \medskip

  \begin{itemize}
    \item Ensure that queries always start from the first CONS cell in the 
      list\pause{}
    \item Put inserts at the front of the list (so they'll be found before any
      previous entries with the same key)
  \end{itemize}
\end{frame}

\begin{frame}[c, fragile]
  \frametitle{Example of CONS cell-based dictionary}
  \begin{overprint}
    \onslide<1>\centerline{\includegraphics[scale=0.2]{img/dictionary-basic.pdf}}%
    \onslide<2>\centerline{\includegraphics[scale=0.2]{img/dictionary-spine.pdf}}%
  \end{overprint}
  \vspace{0.5cm}
  \onslide<1->This is the in-memory representation of the dictionary $\{(10, 
  \text{"foo"}), (\text{"bar"}, 20), (\text{"baz"},\text{"quux"})\}$. 
  \onslide<2->The entries form a list of complex data (the {\em spine\/} of 
  the dictionary).
\end{frame}

\begin{frame}[c]
  \frametitle{A slight intermission}

  Given that we've now described how to define dictionaries using CONS cells, we
  have the ability to define any other data structure (as explained in the
  previous talk).\pause{} However, we can implement some structures a bit better
  than this using CONS cells, including:\pause{}

  \medskip

  \begin{itemize}
    \item Matrices\pause{}
    \item Trees\pause{}
    \item Graphs\pause{}
  \end{itemize}

  \medskip

  To save time, we will only draw diagrams where
  useful.\pause{} If we refer to lists or dictionaries anywhere, assume we mean
  ones based on CONS cells as described previously.
\end{frame}

\begin{frame}[c]
  \frametitle{Matrices}

  \begin{definition}
    The {\em rank\/} of a matrix is the number of dimensions it has.
  \end{definition}\pause{}

  \medskip

  A rank $1$ `matrix' is just a list.\pause{} A rank
  $2$ matrix is a list of lists, a rank $3$ matrix is a list of rank $2$
  matrices, and so on, and so forth.\pause{} 

  \medskip

  We can define a rank $n$ matrix (for $n > 1$) as a spine of rank $n -
  1$ matrices.
\end{frame}

\begin{frame}[c]
  \frametitle{Matrix example}

  Consider the following rank $2$ matrix:\pause{}

  \[
    \begin{bmatrix}
      1 & 2 & 3 & 4\\
      5 & 6 & 7 & 8\\
      9 & 10 & 11 & 12\\
    \end{bmatrix}
  \]

  \medskip

  We can represent it in two ways.\pause{} We can have the spine go along the columns:
  
  \medskip

  {\tt [[1, 5, 9], [2, 6, 10], [3, 7, 11], [4, 8, 12]]}\pause{}

  \medskip

  \noindent{}or along the rows:

  \medskip

  {\tt [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]}
\end{frame}

\begin{frame}[c]
  \frametitle{Trees}

  We will begin with a very simple kind of tree: a {\em binary leaf
  tree}.\pause{} More precisely:

  \medskip

  \begin{definition}
    A {\em binary leaf tree\/} has data only in its leaf nodes, and its internal
    nodes have a maximum of two children.\pause{}
  \end{definition}

  \medskip

  We can represent a binary leaf tree using CONS cells as follows:\pause{}

  \medskip

  \begin{itemize}
    \item An internal node is represented by a CONS cell whose CAR stores a
      reference to its left child (or NIL if none) and whose CDR stores a
      reference to its right child (or NIL if none)\pause{}
    \item A leaf node is represented by a CONS cell storing the same data (or
      reference) in both its CAR and CDR
  \end{itemize}
\end{frame}

\begin{frame}[c, fragile]
  \frametitle{Example binary leaf tree and its CONS cell representation}
  \begin{columns}
    \column{.5\textwidth}
    \centerline{\includegraphics[scale=0.6]{img/leaf-tree.pdf}}%
    \column{.5\textwidth}
    \centerline{\includegraphics[scale=0.15]{img/leaf-tree-cons.pdf}}%
  \end{columns}
\end{frame}

\begin{frame}[c]
  \frametitle{Rose trees}

  If we need trees of arbitrary branching factor (number of children), or where
  data has to be stored in the leaves as well, we can instead use a {\em rose
  tree\/} representation:\pause{}

  \medskip

  \begin{itemize}
    \item A leaf node is represented the same way as for binary leaf
      trees\pause{}
    \item An internal node is represented by a CONS cell whose CAR stores the
      data for that node (or a reference to the data), and whose CDR stores a
      list of references to its children in left-to-right order\pause{}
  \end{itemize}

  \medskip

  We can also include a parent reference in the list of children (typically as
  the first element) if we want.
\end{frame}

\begin{frame}[c]
  \frametitle{Graphs}

  Typically, graphs are stored as an {\em adjacency matrix\/} or an {\em
  adjacency list}.\pause{} The adjacency matrix is easy to represent using a
  rank $2$ matrix as we defined before.\pause{} The adjacency list is a bit
  trickier:\pause{}

  \medskip

  \begin{itemize}
    \item An adjacency list is represented by an {\em adjacency spine\/}\pause{}
    \item An adjacency spine is made up of {\em vertex cells\/}\pause{}
    \item A vertex cell is a CONS cell whose CAR stores a reference to the next
      vertex cell in the adjacency spine, and whose CDR stores a {\em vertex
      list}\pause{}
    \item A vertex list is a CONS cell whose CAR stores the vertex's label (or a
      reference to it), and whose CDR stores a reference to a {\em child
      list\/}\pause{}
    \item A child list's data are references to elements of the adjacency spine,
      corresponding to the neighbours of that vertex
  \end{itemize}

\end{frame}

\section{Limitations and uses}

\begin{frame}[c]
  \frametitle{The bad news}
  All of these are really neat, but unfortunately, these implementations can be
  slow, especially when we have a lot of data, because:\pause{}

  \medskip
  
  \begin{itemize}
    \item Many operations involve scanning `chains' of CONS cells, which give us
      linear (or worse!) time complexity\pause{}
    \item CONS cells are not cache-friendly; typically, we'll only cache the
      cell itself, and {\em not\/} any data its CAR or CDR might hold references
      to\pause{}
    \item This means that we could potentially be getting cache misses {\em
      every time\/} we follow a stored reference!\pause{}
    \item There are ways to make CONS cell-based structures more cache-friendly,
      but they're usually more trouble than they're worth
  \end{itemize}
\end{frame}

\begin{frame}[c]
  \frametitle{So why use them?}

  \begin{itemize}
    \item When we initially set out to solve a problem, we don't often know what
      operations we need, how much data we'll have, or which operations need to
      be fast\pause{}
    \item CONS cell-based structures can be built and {\em extended\/}
      quickly\pause{}
    \item This makes them {\em ideal\/} for prototyping\pause{}
    \item There is no {\em fast\/} --- only fast {\em enough\/}; you might find CONS cells are
      good enough for your purposes!\pause{}
    \item Once you know exactly what you need, you can always replace some (or
      all) of the structure with something more efficient\pause{}
  \end{itemize}

  \medskip

  In short, CONS cell-based structures can help you {\em learn your tradeoffs\/}
  quickly and easily.
\end{frame}

\section{Questions}

\begin{frame}[fragile, c]
  \frametitle{Questions?}
  \begin{center}
  \includegraphics[scale=0.27]{img/questions.pdf}
\end{center}
\end{frame}

\end{document}