From d4858a439f2423ae5a9735f7a9a6bf709392e625 Mon Sep 17 00:00:00 2001 From: Bent Bisballe Nyeng Date: Tue, 1 Aug 2023 16:24:07 +0200 Subject: A4: WIP --- Makefile | 6 ++- a4/exercise.tex | 163 +++++++++++++++++++++++++++++++++++++++++++++++++------- preamble.tex | 5 ++ 3 files changed, 153 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index c343062..cb17956 100644 --- a/Makefile +++ b/Makefile @@ -22,13 +22,15 @@ A3: xelatex -halt-on-error -jobname=${PRE}$@ a3/exercise.tex rm -f ${PRE}$@.aux ${PRE}$@.log -A4: +%.pdf: %.m + octave $< + +A4: a4/insert.pdf a4/insert-padded.pdf a4/remove.pdf a4/remove-padded.pdf zip ${PRE}$@.zip a4/Makefile zip ${PRE}$@.zip a4/*.cc zip ${PRE}$@.zip a4/*.h zip ${PRE}$@.zip a4/*.log zip ${PRE}$@.zip a4/*.m - (cd a4; for m in *.m; do octave $${m}; done) xelatex -halt-on-error -jobname=${PRE}$@ a4/exercise.tex xelatex -halt-on-error -jobname=${PRE}$@ a4/exercise.tex rm -f ${PRE}$@.aux ${PRE}$@.log diff --git a/a4/exercise.tex b/a4/exercise.tex index af53ea5..6ddb081 100644 --- a/a4/exercise.tex +++ b/a4/exercise.tex @@ -24,6 +24,11 @@ suit my needs for this assignment. \bigskip +The main code for exercise is in the file +called \texttt{list\_vs\_vector.cc} + +\bigskip + The random number generation for use in insertion I used the co-routine as described above, inserting, while checking if the number already exists. @@ -36,32 +41,152 @@ Both random number generators were seeded with the iterator number iterations use unique sets of numbers. For the smaller sizes of \texttt{N} (the number of items to be -inserted/removed) i print out the numbers to verify their correctness. +inserted/removed) i print out the numbers to verify their +correctness. These numbers can be seen in the log files in the zip +file. +The program has been compiled and executed with both a built-in int as +the value type of the three containers and a struct padding a single +int with chars up to 1k. The log files for the executions are +named \texttt{console.log} and \texttt{console-padding.log} +respectively. +For the insertion and removal two templated functions were written: +\begin{lstlisting}[language=C++] +template +void sorted_insert(C& container, T element) +{ + auto it = std::begin(container); + while(it != std::end(container) && *it < element) + { + ++it; + } -\begin{figure} - \includegraphics[scale=0.95]{a4/insert.pdf} - \caption{Insert of int} - \label{insert} -\end{figure} + container.insert(it, element); +} -\begin{figure} - \includegraphics[scale=.95]{a4/remove.pdf} - \caption{Removal of int} - \label{remove} -\end{figure} +template +void remove_index(C& container, int index) +{ + auto it = std::begin(container); + for(int i = 0; i < index; ++i) + { + ++it; + } + + container.erase(it); +} +\end{lstlisting} + +With the exception of set insertion, these two functions are used +for all the timed container operations, ie. no clever stuff should be +going on in the index/iterator lookups. + +\bigskip + +In \textit{figure \ref{insert}} the insertion times for \texttt{int}s in the +three container types \texttt{std::vector}, \texttt{std::list} +and \texttt{std::set} can be seen. +I was expecting the \texttt{std::vector} to outperform the othr two +because of data locality, and this is indeed the case. I was however +surprised how much it outperformed them, but this might be because the +compiler could see through my ``slow'' iterator looping and convert +that into the more optimized \texttt{vec.begin() + n} random access +iterator lookup. +At the same time the bad performance of \texttt{std::list} puzzled +me. But it might be because the list has to allocate each of its +elements on the freestore, which the \texttt{std::set} also has to +do. So I'm not really sure what is going on there. + +Similarly in \textit{figure \ref{remove}} the three containers are +compared when removing the \texttt{int}s again by randomized index. +Again I was expecting the \texttt{std::vector} to outperform the other +two, but not as much as it did. +I was however expecting the \texttt{std::set} to be slightly slower +than the \texttt{std::list} because of the extra needed work for +rebalancing the tree. -\begin{figure} - \includegraphics[scale=0.95]{a4/insert-padded.pdf} - \caption{Insert of padded-int (1024 bytes pr. value)} - \label{insert-padded} +\begin{figure}[!tbp] + \makebox[\textwidth][c]{% + \subfloat[Insert]{\scalebox{1.7}{\includegraphics[scale=0.4]{a4/insert.pdf}}\label{insert}} + \quad + \subfloat[Removal]{\scalebox{1.7}{\includegraphics[scale=0.4]{a4/remove.pdf}}\label{remove}} + } +\caption{Plot of operations on the three container types using regular \texttt{int}s} \end{figure} -\begin{figure} - \includegraphics[scale=.95]{a4/remove-padded.pdf} - \caption{Removal of padded-int (1024 bytes pr. value)} - \label{remove-padded} +\bigskip + +To see how much impact the data locality has on the performance of the +three container types, the experiments are repeated with a much bigger +value type. +I decided to create a \texttt{struct}, \texttt{PaddedInt}, with the +needed member functions for it to be storable inside the three containers: +\begin{lstlisting}[language=C++] +struct PaddedInt +{ + PaddedInt(int i) : i(i) {} + int operator=(int other) + { + i = other; + return i; + } + + bool operator<(int other) const + { + return i < other; + } + + bool operator<(const PaddedInt& other) const + { + return i < other.i; + } + + int i; + char padding[1024 - sizeof(int)]{}; // pad up to 1k +}; +\end{lstlisting} +It add padding to each stored item is 1024 bytes. + +\begin{figure}[!tbp] + \makebox[\textwidth][c]{% + \subfloat[Insert of padded int]{\scalebox{1.7}{\includegraphics[scale=0.4]{a4/insert-padded.pdf}}\label{insert-padded}} + \quad + \subfloat[Removal of padding int]{\scalebox{1.7}{\includegraphics[scale=0.4]{a4/remove-padded.pdf}}\label{remove-padded}} + } +\caption{Plot of operations on the three container types using structure with and \texttt{int} and padding up to 1kb} \end{figure} +With this configuration a decrease in the performance of +the \texttt{std::vector} is expected as opposed to +the \texttt{std::list} and \texttt{std::set} which only carry pointers +to their items. + +\bigskip + +Looking at \textit{figure \ref{insert-padded}} the performance of the +insertion indeed seem to have become a lot worse for +the \texttt{std::vector}. Looking at the absolute performance of +the \texttt{std::list} and comparing it +to \textit{figure \ref{insert}} it has become $~10\%$ +slower. \texttt{std::set} on the other hand seem to be largely +unaffected by the increase in value type. This is likely because of +its $O(log(n))$ lookup times, which means that it only have to do a +fraction of the number of fetches from the free-store compared to the +other two. + +Removal of the padded elements from the \texttt{std::list} +and \texttt{std::set} are expected to be unaffected by the increase if +value type size, since they both just store pointers, and because the +iteations used to find the index is not needing data from the value +type. +On the other hand, \texttt{std::vector} will have to do a lot copying +of data (the entire tail will have to be copied one index to the +left), when erasing item in the middle, which amounts top a lot of +data. +A can be seen in \textit{figure \ref{remove-padded}}, as expected, the +performces of \texttt{std::list} and \texttt{std::set} are close to +the \texttt{int} counterparts. And, also as expected, +the \texttt{std::vector} is really being slowed down. + \end{document} diff --git a/preamble.tex b/preamble.tex index 94ef2ba..30d475a 100644 --- a/preamble.tex +++ b/preamble.tex @@ -1,6 +1,11 @@ \documentclass[11pt]{article} \usepackage{graphicx} +%\usepackage{xcolor} \usepackage{fancyhdr} +\usepackage{listings} +\usepackage{subfig} +%\renewcommand{\thesubfigure}{Figure \arabic{subfigure}} +\captionsetup[subfigure]{labelformat=simple, labelsep=colon} \pagestyle{fancy} \author{Bent Bisballe Nyeng University of Aarhus} \begin{document} -- cgit v1.2.3