Commit 0b9ff49a authored by Laros's avatar Laros
Browse files

Added figures, general layout of lecture done.

parent 43053339
set style line 1 lc rgb '#0060ad' lt 1 lw 10 pt 7 ps 4
set style line 2 lc rgb '#dd181f' lt 1 lw 10 pt 5 ps 4
set xlabel "dataset size"
set ylabel "run time"
set xtics 0 200
plot "old.dat" with linespoints ls 1 title "Original", "new.dat" with \
linespoints ls 2 title "Optimised"
set style line 1 lc 1 lt 1 lw 10
set style line 2 lc 3 lt 1 lw 10
set style line 3 lc 4 lt 1 lw 10
set style line 4 lc 7 lt 1 lw 10
set xrange [0:17]
set yrange [0:20]
set xlabel "dataset size"
set ylabel "time"
plot 3 title "Constant" ls 1, 0.8 * x title "Linear" ls 2, \
0.3 * x ** 2 title "Quadratic" ls 3, 5 * 2**x - 5 title "Exponential" ls 4
1 0.21
2 0.23
3 0.23
4 0.26
5 0.25
10 0.27
20 0.33
30 0.38
40 0.43
100 0.76
200 1.28
300 1.84
400 2.36
1000 5.59
1 0.75
2 0.96
3 1.14
4 1.39
5 1.56
10 2.62
20 4.74
30 7.55
40 8.86
100 21.70
200 41.98
300 63.23
400 85.43
1000 216.20
\documentclass[slidestop]{beamer}
\usepackage{epigraph}
\setlength{\epigraphwidth}{0.65\textwidth}
\renewcommand{\epigraphsize}{\footnotesize}
\renewcommand{\textflush}{center}
\title{Code optimisation}
\providecommand{\myConference}{Work discussion}
\providecommand{\myDate}{Wednesday, 24 February 2011}
......@@ -29,46 +34,260 @@
% Make the title page.
\bodytemplate
%
% Easy way: running on a cluster / grid.
% - Linear speedup.
% - Expensive (cost per node).
% - Takes quite some time.
% Complexity.
% - Examples of linear / quadratic algorithms.
% Find critical section(s).
% - 99% of the time you spend in 1% of the code.
% - Profilers.
% First page of the presentation.
\section{Introduction}
\subsection{Before we start}
\begin{pframe}
Readability comes first:
\begin{itemize}
\item First write \emph{readable} and \emph{maintainable} code.
\item Only optimise when needed.
\end{itemize}
\vfill
\epigraph{Premature optimization is the root of all evil.}{Donald Knuth}
\end{pframe}
\subsection{Popular view}
\begin{pframe}
Use more computers, buy more hardware.
\bigskip
But consider this:
\begin{itemize}
\item Only basic speedup.
\item Add a lot of complexity to your program.
\item Costly.
\end{itemize}
\bigskip
Related opinion: ``Don't worry about complexity, everything will be solved
once computers become faster.''
\end{pframe}
\section{Complexity theory}
\subsection{Theory}
\begin{pframe}
Study of the \emph{time} and \emph{memory} complexity of algorithms.
\bigskip
\begin{table}[]
\begin{center}
\begin{tabular}{ll}
Correlation & Complexity\\
\hline
Constant & $\mathcal{O}(1)$\\
Linear & $\mathcal{O}(n)$\\
Quadratic & $\mathcal{O}(n^2)$\\
Exponential & $\mathcal{O}(2^n)$
\end{tabular}
\end{center}
\caption{Examples of different complexities.}
\end{table}
What is the correlation between the size of the input ($n$) and the memory or
CPU usage?
\end{pframe}
\begin{pframe}
\begin{figure}[]
\begin{center}
\colorbox{white}{
\includegraphics[height=0.8\textheight]{complexity}
}
\end{center}
\caption{}
\end{figure}
\end{pframe}
\subsection{Examples}
\begin{pframe}
Constant complexity ($\mathcal{O}(1)$).
\bigskip
\begin{lstlisting}[language=python, caption={Get the first letter of a
word.}]
def first_letter(word):
return word[0]
\end{lstlisting}
No dependence on the length of the word.
\end{pframe}
\begin{pframe}
Linear complexity ($\mathcal{O}(n)$).
\bigskip
\begin{lstlisting}[language=python, caption={Find the maximum value in a
list.}]
def maximum_value(list_of_values):
maximum = 0
for value in list_of_values:
if value > maximum:
maximum = value
return maximum
\end{lstlisting}
If the list is twice as long, this routine will take twice as much time.
\end{pframe}
\begin{pframe}
Quadratic complexity ($\mathcal{O}(n^2)$).
\bigskip
\begin{lstlisting}[language=python, caption={Get the first letter of a
word.}]
def maximum_pair(list_of_values):
maximum = 0
for index, value_1 in enumerate(list_of_values):
for value_2 in list_of_values[index + 1:]:
if value_1 + value_2 > maximum:
maximum = value_1 + value_2
return maximum
\end{lstlisting}
The amount of time grows faster than the length of the input.
\end{pframe}
\begin{pframe}
\begin{table}[]
\begin{center}
\begin{tabular}{ll}
Algorithm & Complexity\\
\hline
Intersecting sorted regions & $\mathcal{O}(n)$\\
Sorting & $\mathcal{O}(n\mathrm{\ log\ }n)$\\
Pairwise alignment & $\mathcal{O}(n^2)$\\
\textit{De novo} assembly & $\mathcal{O}(2^n)$\\
\end{tabular}
\end{center}
\caption{Known complexities.}
\end{table}
\end{pframe}
\section{Bottlenecks}
\section{}
\begin{pframe}
The $90/10$ law: \textit{$90\%$ of the execution time of a computer program
is spent executing 10\% of the code.}
\bigskip
Finding \emph{bottlenecks} in your code (focus on the $10\%$).
\end{pframe}
\subsection{Profilers}
\begin{pframe}
Use a \emph{profiler} to see which part takes up the most time.
\bigskip
\begin{lstlisting}[language=none, caption={Profiling output.}]
ncalls tottime cumtime filename:line(function)
1 0.002 35.276 tssv:3(<module>)
1 0.000 34.776 tssv.py:437(main)
1 0.058 34.767 tssv.py:336(tssv)
7272 0.185 34.599 tssv.py:121(alignPair)
14544 0.100 34.408 sg_align.py:71(align)
14544 25.350 33.433 sg_align.py:22(_align)
11092934 7.192 7.192 {min}
895601 0.958 0.958 {range}
\end{lstlisting}
The culprit in this example is the function ``\lstinline{_align}''.
\end{pframe}
\section{Complexity of bottlenecks}
% Investigate complexity.
% - Example of quadratic algorithm that has linear counterpart.
% - This gives more speedup than parallelisation.
% - Very hard, even for experienced programmers, do not hesitate to ask.
% Optimisation using other languages (C).
% - Linear speedup.
\begin{pframe}
Overlap of regions.
\bigskip
\end{pframe}
\section{Combining languages}
\subsection{Compiled languages}
\begin{pframe}
Rewrite bottleneck in a \emph{compiled language}.
\begin{itemize}
\item Still readable.
\item Linear speedup.
\item Requires compilation of your code.
\end{itemize}
\vfill
\epigraph{
When I find my code in tons of trouble,\\
Friend and colleagues come to me,\\
Speaking words of wisdom:\\
``Write in C.''
}{Anonymous}
\end{pframe}
\subsection{Example: TSSV}
% - Example TSSV (one day of work saves 2 nodes a 20.000 euro.
% Parallelisation (last resort).
%
\begin{pframe}
\begin{lstlisting}[language=python, caption={Python version.}]
def _align(matrix, xSize, ySize, seq1, seq2):
for x in range(1, xSize):
for y in range(1, ySize):
matrix[x][y] = min(
matrix[x -1][y] + 1,
matrix[x][y - 1] + 1,
matrix[x - 1][y - 1] +
int(seq1[x - 1] != seq2[y - 1]))
\end{lstlisting}
\end{pframe}
% First page of the presentation.
\section{Introduction}
\subsection{Using other languages}
\begin{pframe}
\begin{center}
\begin{minipage}{0.63\textwidth}
\begin{center}
\textit{
When I find my code in tons of trouble,\\
Friend and colleagues come to me,\\
Speaking words of wisdom:\\
``Write in C.''
}
\hfill --- Anonymous
\end{center}
\end{minipage}
\end{center}
\begin{lstlisting}[language=python, caption={C version.}]
void _align(int **matrix, int x_size, int y_size,
char *seq1, char *seq2) {
int x,
y;
for (x = 1; x < x_size; x++)
for (y = 1; y < y_size; y++)
matrix[x][y] = _min(
_min(
matrix[x - 1][y] + 1,
matrix[x][y - 1] + 1),
matrix[x - 1][y - 1] +
(int)(seq1[x - 1] != seq2[y - 1]));
}
\end{lstlisting}
\end{pframe}
\begin{pframe}
\begin{figure}[]
\begin{center}
\colorbox{white}{
\includegraphics[height=0.8\textheight]{benchmark}
}
\end{center}
\caption{Run times for original and optimised versions.}
\end{figure}
\end{pframe}
\begin{pframe}
\begin{figure}[]
\begin{center}
\colorbox{white}{
\includegraphics[height=0.8\textheight]{speed}
}
\end{center}
\caption{Speedup.}
\end{figure}
\end{pframe}
\section{Parallelisation}
% Parallelisation (last resort).
%
\section{Questions?}
\lastpagetemplate
\begin{pframe}
......
1 3.571
2 4.173
3 4.956
4 5.346
5 6.240
10 9.703
20 14.363
30 19.868
40 20.604
100 28.552
200 32.796
300 34.364
400 36.199
1000 38.676
set style line 1 lc rgb '#0060ad' lt 1 lw 10 pt 7 ps 4
set xlabel "dataset size"
set ylabel "speedup"
set xtics 0 200
plot "speed.dat" with linespoints ls 1 notitle
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment