diff --git a/pandas/pandas.ipynb b/pandas/pandas.ipynb index 31cfbf3bdd2334d8da92e5383b75f8b1b068d816..84acfe85e9c08aa18d603ca0cd8a7da37305afed 100644 --- a/pandas/pandas.ipynb +++ b/pandas/pandas.ipynb @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 471, "metadata": { "collapsed": true }, @@ -95,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 472, "metadata": {}, "outputs": [ { @@ -104,7 +104,7 @@ "u'0.20.1'" ] }, - "execution_count": 97, + "execution_count": 472, "metadata": {}, "output_type": "execute_result" } @@ -129,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 473, "metadata": {}, "outputs": [ { @@ -144,7 +144,7 @@ "dtype: float64" ] }, - "execution_count": 98, + "execution_count": 473, "metadata": {}, "output_type": "execute_result" } @@ -164,7 +164,7 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 474, "metadata": {}, "outputs": [ { @@ -175,7 +175,7 @@ " dtype='datetime64[ns]', freq='D')" ] }, - "execution_count": 99, + "execution_count": 474, "metadata": {}, "output_type": "execute_result" } @@ -188,21 +188,21 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 475, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([[ 0.23051414, -0.28580632, 0.35457973, 1.63813223],\n", - " [ 1.46610552, 0.36143299, -0.69967186, 0.5909181 ],\n", - " [ 0.85840437, -0.20824091, -0.32334378, -0.26983698],\n", - " [ 0.26830511, -0.78065576, -0.22962856, 0.77808124],\n", - " [ 1.83936044, 1.14887577, 1.2779539 , 0.70010126],\n", - " [-1.19054289, 0.91047078, -0.0720562 , -0.41625165]])" + "array([[-0.19285917, 0.32914174, 1.05612678, 0.90109364],\n", + " [-0.52196454, -0.77643073, -0.05150983, 0.87993839],\n", + " [ 0.82601868, -0.44544422, 2.14961986, 1.79456143],\n", + " [ 0.37800763, -0.55051725, 1.70024071, 0.85985274],\n", + " [-0.35328306, 2.52404431, -0.73611908, -0.32295207],\n", + " [ 0.68458742, -0.49398027, 0.17228974, -0.11312861]])" ] }, - "execution_count": 100, + "execution_count": 475, "metadata": {}, "output_type": "execute_result" } @@ -215,7 +215,7 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 476, "metadata": {}, "outputs": [], "source": [ @@ -224,7 +224,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 477, "metadata": {}, "outputs": [ { @@ -257,45 +257,45 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>0.230514</td>\n", - " <td>-0.285806</td>\n", - " <td>0.354580</td>\n", - " <td>1.638132</td>\n", + " <td>-0.192859</td>\n", + " <td>0.329142</td>\n", + " <td>1.056127</td>\n", + " <td>0.901094</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>1.466106</td>\n", - " <td>0.361433</td>\n", - " <td>-0.699672</td>\n", - " <td>0.590918</td>\n", + " <td>-0.521965</td>\n", + " <td>-0.776431</td>\n", + " <td>-0.051510</td>\n", + " <td>0.879938</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>0.858404</td>\n", - " <td>-0.208241</td>\n", - " <td>-0.323344</td>\n", - " <td>-0.269837</td>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>2.149620</td>\n", + " <td>1.794561</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>0.268305</td>\n", - " <td>-0.780656</td>\n", - " <td>-0.229629</td>\n", - " <td>0.778081</td>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>1.700241</td>\n", + " <td>0.859853</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", - " <td>1.839360</td>\n", - " <td>1.148876</td>\n", - " <td>1.277954</td>\n", - " <td>0.700101</td>\n", + " <td>-0.353283</td>\n", + " <td>2.524044</td>\n", + " <td>-0.736119</td>\n", + " <td>-0.322952</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", - " <td>-1.190543</td>\n", - " <td>0.910471</td>\n", - " <td>-0.072056</td>\n", - " <td>-0.416252</td>\n", + " <td>0.684587</td>\n", + " <td>-0.493980</td>\n", + " <td>0.172290</td>\n", + " <td>-0.113129</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -303,15 +303,15 @@ ], "text/plain": [ " A B D C\n", - "0 0.230514 -0.285806 0.354580 1.638132\n", - "1 1.466106 0.361433 -0.699672 0.590918\n", - "2 0.858404 -0.208241 -0.323344 -0.269837\n", - "3 0.268305 -0.780656 -0.229629 0.778081\n", - "4 1.839360 1.148876 1.277954 0.700101\n", - "5 -1.190543 0.910471 -0.072056 -0.416252" + "0 -0.192859 0.329142 1.056127 0.901094\n", + "1 -0.521965 -0.776431 -0.051510 0.879938\n", + "2 0.826019 -0.445444 2.149620 1.794561\n", + "3 0.378008 -0.550517 1.700241 0.859853\n", + "4 -0.353283 2.524044 -0.736119 -0.322952\n", + "5 0.684587 -0.493980 0.172290 -0.113129" ] }, - "execution_count": 102, + "execution_count": 477, "metadata": {}, "output_type": "execute_result" } @@ -322,7 +322,7 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 478, "metadata": {}, "outputs": [], "source": [ @@ -331,7 +331,7 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 479, "metadata": {}, "outputs": [ { @@ -364,45 +364,45 @@ " <tbody>\n", " <tr>\n", " <th>2017-09-01</th>\n", - " <td>0.230514</td>\n", - " <td>-0.285806</td>\n", - " <td>0.354580</td>\n", - " <td>1.638132</td>\n", + " <td>-0.192859</td>\n", + " <td>0.329142</td>\n", + " <td>1.056127</td>\n", + " <td>0.901094</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-02</th>\n", - " <td>1.466106</td>\n", - " <td>0.361433</td>\n", - " <td>-0.699672</td>\n", - " <td>0.590918</td>\n", + " <td>-0.521965</td>\n", + " <td>-0.776431</td>\n", + " <td>-0.051510</td>\n", + " <td>0.879938</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-03</th>\n", - " <td>0.858404</td>\n", - " <td>-0.208241</td>\n", - " <td>-0.323344</td>\n", - " <td>-0.269837</td>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>2.149620</td>\n", + " <td>1.794561</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-04</th>\n", - " <td>0.268305</td>\n", - " <td>-0.780656</td>\n", - " <td>-0.229629</td>\n", - " <td>0.778081</td>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>1.700241</td>\n", + " <td>0.859853</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-05</th>\n", - " <td>1.839360</td>\n", - " <td>1.148876</td>\n", - " <td>1.277954</td>\n", - " <td>0.700101</td>\n", + " <td>-0.353283</td>\n", + " <td>2.524044</td>\n", + " <td>-0.736119</td>\n", + " <td>-0.322952</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-06</th>\n", - " <td>-1.190543</td>\n", - " <td>0.910471</td>\n", - " <td>-0.072056</td>\n", - " <td>-0.416252</td>\n", + " <td>0.684587</td>\n", + " <td>-0.493980</td>\n", + " <td>0.172290</td>\n", + " <td>-0.113129</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -410,15 +410,15 @@ ], "text/plain": [ " A B D C\n", - "2017-09-01 0.230514 -0.285806 0.354580 1.638132\n", - "2017-09-02 1.466106 0.361433 -0.699672 0.590918\n", - "2017-09-03 0.858404 -0.208241 -0.323344 -0.269837\n", - "2017-09-04 0.268305 -0.780656 -0.229629 0.778081\n", - "2017-09-05 1.839360 1.148876 1.277954 0.700101\n", - "2017-09-06 -1.190543 0.910471 -0.072056 -0.416252" + "2017-09-01 -0.192859 0.329142 1.056127 0.901094\n", + "2017-09-02 -0.521965 -0.776431 -0.051510 0.879938\n", + "2017-09-03 0.826019 -0.445444 2.149620 1.794561\n", + "2017-09-04 0.378008 -0.550517 1.700241 0.859853\n", + "2017-09-05 -0.353283 2.524044 -0.736119 -0.322952\n", + "2017-09-06 0.684587 -0.493980 0.172290 -0.113129" ] }, - "execution_count": 104, + "execution_count": 479, "metadata": {}, "output_type": "execute_result" } @@ -429,7 +429,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 480, "metadata": {}, "outputs": [ { @@ -510,7 +510,7 @@ "3 1.0 2017-09-20 1.0 3 EMC researcher" ] }, - "execution_count": 105, + "execution_count": 480, "metadata": {}, "output_type": "execute_result" } @@ -529,7 +529,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 481, "metadata": {}, "outputs": [ { @@ -544,7 +544,7 @@ "dtype: object" ] }, - "execution_count": 106, + "execution_count": 481, "metadata": {}, "output_type": "execute_result" } @@ -562,7 +562,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 482, "metadata": { "collapsed": true }, @@ -587,7 +587,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 483, "metadata": {}, "outputs": [ { @@ -620,38 +620,38 @@ " <tbody>\n", " <tr>\n", " <th>2017-09-01</th>\n", - " <td>0.230514</td>\n", - " <td>-0.285806</td>\n", - " <td>0.354580</td>\n", - " <td>1.638132</td>\n", + " <td>-0.192859</td>\n", + " <td>0.329142</td>\n", + " <td>1.056127</td>\n", + " <td>0.901094</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-02</th>\n", - " <td>1.466106</td>\n", - " <td>0.361433</td>\n", - " <td>-0.699672</td>\n", - " <td>0.590918</td>\n", + " <td>-0.521965</td>\n", + " <td>-0.776431</td>\n", + " <td>-0.051510</td>\n", + " <td>0.879938</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-03</th>\n", - " <td>0.858404</td>\n", - " <td>-0.208241</td>\n", - " <td>-0.323344</td>\n", - " <td>-0.269837</td>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>2.149620</td>\n", + " <td>1.794561</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-04</th>\n", - " <td>0.268305</td>\n", - " <td>-0.780656</td>\n", - " <td>-0.229629</td>\n", - " <td>0.778081</td>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>1.700241</td>\n", + " <td>0.859853</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-05</th>\n", - " <td>1.839360</td>\n", - " <td>1.148876</td>\n", - " <td>1.277954</td>\n", - " <td>0.700101</td>\n", + " <td>-0.353283</td>\n", + " <td>2.524044</td>\n", + " <td>-0.736119</td>\n", + " <td>-0.322952</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -659,14 +659,14 @@ ], "text/plain": [ " A B D C\n", - "2017-09-01 0.230514 -0.285806 0.354580 1.638132\n", - "2017-09-02 1.466106 0.361433 -0.699672 0.590918\n", - "2017-09-03 0.858404 -0.208241 -0.323344 -0.269837\n", - "2017-09-04 0.268305 -0.780656 -0.229629 0.778081\n", - "2017-09-05 1.839360 1.148876 1.277954 0.700101" + "2017-09-01 -0.192859 0.329142 1.056127 0.901094\n", + "2017-09-02 -0.521965 -0.776431 -0.051510 0.879938\n", + "2017-09-03 0.826019 -0.445444 2.149620 1.794561\n", + "2017-09-04 0.378008 -0.550517 1.700241 0.859853\n", + "2017-09-05 -0.353283 2.524044 -0.736119 -0.322952" ] }, - "execution_count": 108, + "execution_count": 483, "metadata": {}, "output_type": "execute_result" } @@ -677,7 +677,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 484, "metadata": {}, "outputs": [ { @@ -710,17 +710,17 @@ " <tbody>\n", " <tr>\n", " <th>2017-09-05</th>\n", - " <td>1.839360</td>\n", - " <td>1.148876</td>\n", - " <td>1.277954</td>\n", - " <td>0.700101</td>\n", + " <td>-0.353283</td>\n", + " <td>2.524044</td>\n", + " <td>-0.736119</td>\n", + " <td>-0.322952</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-06</th>\n", - " <td>-1.190543</td>\n", - " <td>0.910471</td>\n", - " <td>-0.072056</td>\n", - " <td>-0.416252</td>\n", + " <td>0.684587</td>\n", + " <td>-0.493980</td>\n", + " <td>0.172290</td>\n", + " <td>-0.113129</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -728,11 +728,11 @@ ], "text/plain": [ " A B D C\n", - "2017-09-05 1.839360 1.148876 1.277954 0.700101\n", - "2017-09-06 -1.190543 0.910471 -0.072056 -0.416252" + "2017-09-05 -0.353283 2.524044 -0.736119 -0.322952\n", + "2017-09-06 0.684587 -0.493980 0.172290 -0.113129" ] }, - "execution_count": 109, + "execution_count": 484, "metadata": {}, "output_type": "execute_result" } @@ -750,7 +750,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 485, "metadata": {}, "outputs": [ { @@ -761,7 +761,7 @@ " dtype='datetime64[ns]', freq='D')" ] }, - "execution_count": 110, + "execution_count": 485, "metadata": {}, "output_type": "execute_result" } @@ -772,7 +772,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 486, "metadata": {}, "outputs": [ { @@ -781,7 +781,7 @@ "Index([u'A', u'B', u'D', u'C'], dtype='object')" ] }, - "execution_count": 111, + "execution_count": 486, "metadata": {}, "output_type": "execute_result" } @@ -792,21 +792,21 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": 487, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([[ 0.23051414, -0.28580632, 0.35457973, 1.63813223],\n", - " [ 1.46610552, 0.36143299, -0.69967186, 0.5909181 ],\n", - " [ 0.85840437, -0.20824091, -0.32334378, -0.26983698],\n", - " [ 0.26830511, -0.78065576, -0.22962856, 0.77808124],\n", - " [ 1.83936044, 1.14887577, 1.2779539 , 0.70010126],\n", - " [-1.19054289, 0.91047078, -0.0720562 , -0.41625165]])" + "array([[-0.19285917, 0.32914174, 1.05612678, 0.90109364],\n", + " [-0.52196454, -0.77643073, -0.05150983, 0.87993839],\n", + " [ 0.82601868, -0.44544422, 2.14961986, 1.79456143],\n", + " [ 0.37800763, -0.55051725, 1.70024071, 0.85985274],\n", + " [-0.35328306, 2.52404431, -0.73611908, -0.32295207],\n", + " [ 0.68458742, -0.49398027, 0.17228974, -0.11312861]])" ] }, - "execution_count": 112, + "execution_count": 487, "metadata": {}, "output_type": "execute_result" } @@ -824,7 +824,7 @@ }, { "cell_type": "code", - "execution_count": 113, + "execution_count": 488, "metadata": {}, "outputs": [ { @@ -864,52 +864,52 @@ " </tr>\n", " <tr>\n", " <th>mean</th>\n", - " <td>0.578691</td>\n", - " <td>0.191013</td>\n", - " <td>0.051306</td>\n", - " <td>0.503524</td>\n", + " <td>0.136751</td>\n", + " <td>0.097802</td>\n", + " <td>0.715108</td>\n", + " <td>0.666561</td>\n", " </tr>\n", " <tr>\n", " <th>std</th>\n", - " <td>1.077292</td>\n", - " <td>0.747672</td>\n", - " <td>0.692115</td>\n", - " <td>0.755390</td>\n", + " <td>0.568525</td>\n", + " <td>1.246603</td>\n", + " <td>1.107521</td>\n", + " <td>0.774252</td>\n", " </tr>\n", " <tr>\n", " <th>min</th>\n", - " <td>-1.190543</td>\n", - " <td>-0.780656</td>\n", - " <td>-0.699672</td>\n", - " <td>-0.416252</td>\n", + " <td>-0.521965</td>\n", + " <td>-0.776431</td>\n", + " <td>-0.736119</td>\n", + " <td>-0.322952</td>\n", " </tr>\n", " <tr>\n", " <th>25%</th>\n", - " <td>0.239962</td>\n", - " <td>-0.266415</td>\n", - " <td>-0.299915</td>\n", - " <td>-0.054648</td>\n", + " <td>-0.313177</td>\n", + " <td>-0.536383</td>\n", + " <td>0.004440</td>\n", + " <td>0.130117</td>\n", " </tr>\n", " <tr>\n", " <th>50%</th>\n", - " <td>0.563355</td>\n", - " <td>0.076596</td>\n", - " <td>-0.150842</td>\n", - " <td>0.645510</td>\n", + " <td>0.092574</td>\n", + " <td>-0.469712</td>\n", + " <td>0.614208</td>\n", + " <td>0.869896</td>\n", " </tr>\n", " <tr>\n", " <th>75%</th>\n", - " <td>1.314180</td>\n", - " <td>0.773211</td>\n", - " <td>0.247921</td>\n", - " <td>0.758586</td>\n", + " <td>0.607942</td>\n", + " <td>0.135495</td>\n", + " <td>1.539212</td>\n", + " <td>0.895805</td>\n", " </tr>\n", " <tr>\n", " <th>max</th>\n", - " <td>1.839360</td>\n", - " <td>1.148876</td>\n", - " <td>1.277954</td>\n", - " <td>1.638132</td>\n", + " <td>0.826019</td>\n", + " <td>2.524044</td>\n", + " <td>2.149620</td>\n", + " <td>1.794561</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -918,16 +918,16 @@ "text/plain": [ " A B D C\n", "count 6.000000 6.000000 6.000000 6.000000\n", - "mean 0.578691 0.191013 0.051306 0.503524\n", - "std 1.077292 0.747672 0.692115 0.755390\n", - "min -1.190543 -0.780656 -0.699672 -0.416252\n", - "25% 0.239962 -0.266415 -0.299915 -0.054648\n", - "50% 0.563355 0.076596 -0.150842 0.645510\n", - "75% 1.314180 0.773211 0.247921 0.758586\n", - "max 1.839360 1.148876 1.277954 1.638132" + "mean 0.136751 0.097802 0.715108 0.666561\n", + "std 0.568525 1.246603 1.107521 0.774252\n", + "min -0.521965 -0.776431 -0.736119 -0.322952\n", + "25% -0.313177 -0.536383 0.004440 0.130117\n", + "50% 0.092574 -0.469712 0.614208 0.869896\n", + "75% 0.607942 0.135495 1.539212 0.895805\n", + "max 0.826019 2.524044 2.149620 1.794561" ] }, - "execution_count": 113, + "execution_count": 488, "metadata": {}, "output_type": "execute_result" } @@ -945,7 +945,7 @@ }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 489, "metadata": {}, "outputs": [ { @@ -980,39 +980,39 @@ " <tbody>\n", " <tr>\n", " <th>A</th>\n", - " <td>0.230514</td>\n", - " <td>1.466106</td>\n", - " <td>0.858404</td>\n", - " <td>0.268305</td>\n", - " <td>1.839360</td>\n", - " <td>-1.190543</td>\n", + " <td>-0.192859</td>\n", + " <td>-0.521965</td>\n", + " <td>0.826019</td>\n", + " <td>0.378008</td>\n", + " <td>-0.353283</td>\n", + " <td>0.684587</td>\n", " </tr>\n", " <tr>\n", " <th>B</th>\n", - " <td>-0.285806</td>\n", - " <td>0.361433</td>\n", - " <td>-0.208241</td>\n", - " <td>-0.780656</td>\n", - " <td>1.148876</td>\n", - " <td>0.910471</td>\n", + " <td>0.329142</td>\n", + " <td>-0.776431</td>\n", + " <td>-0.445444</td>\n", + " <td>-0.550517</td>\n", + " <td>2.524044</td>\n", + " <td>-0.493980</td>\n", " </tr>\n", " <tr>\n", " <th>D</th>\n", - " <td>0.354580</td>\n", - " <td>-0.699672</td>\n", - " <td>-0.323344</td>\n", - " <td>-0.229629</td>\n", - " <td>1.277954</td>\n", - " <td>-0.072056</td>\n", + " <td>1.056127</td>\n", + " <td>-0.051510</td>\n", + " <td>2.149620</td>\n", + " <td>1.700241</td>\n", + " <td>-0.736119</td>\n", + " <td>0.172290</td>\n", " </tr>\n", " <tr>\n", " <th>C</th>\n", - " <td>1.638132</td>\n", - " <td>0.590918</td>\n", - " <td>-0.269837</td>\n", - " <td>0.778081</td>\n", - " <td>0.700101</td>\n", - " <td>-0.416252</td>\n", + " <td>0.901094</td>\n", + " <td>0.879938</td>\n", + " <td>1.794561</td>\n", + " <td>0.859853</td>\n", + " <td>-0.322952</td>\n", + " <td>-0.113129</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -1020,13 +1020,13 @@ ], "text/plain": [ " 2017-09-01 2017-09-02 2017-09-03 2017-09-04 2017-09-05 2017-09-06\n", - "A 0.230514 1.466106 0.858404 0.268305 1.839360 -1.190543\n", - "B -0.285806 0.361433 -0.208241 -0.780656 1.148876 0.910471\n", - "D 0.354580 -0.699672 -0.323344 -0.229629 1.277954 -0.072056\n", - "C 1.638132 0.590918 -0.269837 0.778081 0.700101 -0.416252" + "A -0.192859 -0.521965 0.826019 0.378008 -0.353283 0.684587\n", + "B 0.329142 -0.776431 -0.445444 -0.550517 2.524044 -0.493980\n", + "D 1.056127 -0.051510 2.149620 1.700241 -0.736119 0.172290\n", + "C 0.901094 0.879938 1.794561 0.859853 -0.322952 -0.113129" ] }, - "execution_count": 114, + "execution_count": 489, "metadata": {}, "output_type": "execute_result" } @@ -1044,7 +1044,7 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 490, "metadata": {}, "outputs": [ { @@ -1077,45 +1077,45 @@ " <tbody>\n", " <tr>\n", " <th>2017-09-01</th>\n", - " <td>0.230514</td>\n", - " <td>-0.285806</td>\n", - " <td>1.638132</td>\n", - " <td>0.354580</td>\n", + " <td>-0.192859</td>\n", + " <td>0.329142</td>\n", + " <td>0.901094</td>\n", + " <td>1.056127</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-02</th>\n", - " <td>1.466106</td>\n", - " <td>0.361433</td>\n", - " <td>0.590918</td>\n", - " <td>-0.699672</td>\n", + " <td>-0.521965</td>\n", + " <td>-0.776431</td>\n", + " <td>0.879938</td>\n", + " <td>-0.051510</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-03</th>\n", - " <td>0.858404</td>\n", - " <td>-0.208241</td>\n", - " <td>-0.269837</td>\n", - " <td>-0.323344</td>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>1.794561</td>\n", + " <td>2.149620</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-04</th>\n", - " <td>0.268305</td>\n", - " <td>-0.780656</td>\n", - " <td>0.778081</td>\n", - " <td>-0.229629</td>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>0.859853</td>\n", + " <td>1.700241</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-05</th>\n", - " <td>1.839360</td>\n", - " <td>1.148876</td>\n", - " <td>0.700101</td>\n", - " <td>1.277954</td>\n", + " <td>-0.353283</td>\n", + " <td>2.524044</td>\n", + " <td>-0.322952</td>\n", + " <td>-0.736119</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-06</th>\n", - " <td>-1.190543</td>\n", - " <td>0.910471</td>\n", - " <td>-0.416252</td>\n", - " <td>-0.072056</td>\n", + " <td>0.684587</td>\n", + " <td>-0.493980</td>\n", + " <td>-0.113129</td>\n", + " <td>0.172290</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -1123,15 +1123,15 @@ ], "text/plain": [ " A B C D\n", - "2017-09-01 0.230514 -0.285806 1.638132 0.354580\n", - "2017-09-02 1.466106 0.361433 0.590918 -0.699672\n", - "2017-09-03 0.858404 -0.208241 -0.269837 -0.323344\n", - "2017-09-04 0.268305 -0.780656 0.778081 -0.229629\n", - "2017-09-05 1.839360 1.148876 0.700101 1.277954\n", - "2017-09-06 -1.190543 0.910471 -0.416252 -0.072056" + "2017-09-01 -0.192859 0.329142 0.901094 1.056127\n", + "2017-09-02 -0.521965 -0.776431 0.879938 -0.051510\n", + "2017-09-03 0.826019 -0.445444 1.794561 2.149620\n", + "2017-09-04 0.378008 -0.550517 0.859853 1.700241\n", + "2017-09-05 -0.353283 2.524044 -0.322952 -0.736119\n", + "2017-09-06 0.684587 -0.493980 -0.113129 0.172290" ] }, - "execution_count": 121, + "execution_count": 490, "metadata": {}, "output_type": "execute_result" } @@ -1143,7 +1143,7 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 491, "metadata": {}, "outputs": [ { @@ -1175,46 +1175,46 @@ " </thead>\n", " <tbody>\n", " <tr>\n", - " <th>2017-09-04</th>\n", - " <td>0.268305</td>\n", - " <td>-0.780656</td>\n", - " <td>-0.229629</td>\n", - " <td>0.778081</td>\n", + " <th>2017-09-02</th>\n", + " <td>-0.521965</td>\n", + " <td>-0.776431</td>\n", + " <td>-0.051510</td>\n", + " <td>0.879938</td>\n", " </tr>\n", " <tr>\n", - " <th>2017-09-01</th>\n", - " <td>0.230514</td>\n", - " <td>-0.285806</td>\n", - " <td>0.354580</td>\n", - " <td>1.638132</td>\n", + " <th>2017-09-04</th>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>1.700241</td>\n", + " <td>0.859853</td>\n", " </tr>\n", " <tr>\n", - " <th>2017-09-03</th>\n", - " <td>0.858404</td>\n", - " <td>-0.208241</td>\n", - " <td>-0.323344</td>\n", - " <td>-0.269837</td>\n", + " <th>2017-09-06</th>\n", + " <td>0.684587</td>\n", + " <td>-0.493980</td>\n", + " <td>0.172290</td>\n", + " <td>-0.113129</td>\n", " </tr>\n", " <tr>\n", - " <th>2017-09-02</th>\n", - " <td>1.466106</td>\n", - " <td>0.361433</td>\n", - " <td>-0.699672</td>\n", - " <td>0.590918</td>\n", + " <th>2017-09-03</th>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>2.149620</td>\n", + " <td>1.794561</td>\n", " </tr>\n", " <tr>\n", - " <th>2017-09-06</th>\n", - " <td>-1.190543</td>\n", - " <td>0.910471</td>\n", - " <td>-0.072056</td>\n", - " <td>-0.416252</td>\n", + " <th>2017-09-01</th>\n", + " <td>-0.192859</td>\n", + " <td>0.329142</td>\n", + " <td>1.056127</td>\n", + " <td>0.901094</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-05</th>\n", - " <td>1.839360</td>\n", - " <td>1.148876</td>\n", - " <td>1.277954</td>\n", - " <td>0.700101</td>\n", + " <td>-0.353283</td>\n", + " <td>2.524044</td>\n", + " <td>-0.736119</td>\n", + " <td>-0.322952</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -1222,15 +1222,15 @@ ], "text/plain": [ " A B D C\n", - "2017-09-04 0.268305 -0.780656 -0.229629 0.778081\n", - "2017-09-01 0.230514 -0.285806 0.354580 1.638132\n", - "2017-09-03 0.858404 -0.208241 -0.323344 -0.269837\n", - "2017-09-02 1.466106 0.361433 -0.699672 0.590918\n", - "2017-09-06 -1.190543 0.910471 -0.072056 -0.416252\n", - "2017-09-05 1.839360 1.148876 1.277954 0.700101" + "2017-09-02 -0.521965 -0.776431 -0.051510 0.879938\n", + "2017-09-04 0.378008 -0.550517 1.700241 0.859853\n", + "2017-09-06 0.684587 -0.493980 0.172290 -0.113129\n", + "2017-09-03 0.826019 -0.445444 2.149620 1.794561\n", + "2017-09-01 -0.192859 0.329142 1.056127 0.901094\n", + "2017-09-05 -0.353283 2.524044 -0.736119 -0.322952" ] }, - "execution_count": 116, + "execution_count": 491, "metadata": {}, "output_type": "execute_result" } @@ -1256,22 +1256,22 @@ }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 492, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "2017-09-01 0.230514\n", - "2017-09-02 1.466106\n", - "2017-09-03 0.858404\n", - "2017-09-04 0.268305\n", - "2017-09-05 1.839360\n", - "2017-09-06 -1.190543\n", + "2017-09-01 -0.192859\n", + "2017-09-02 -0.521965\n", + "2017-09-03 0.826019\n", + "2017-09-04 0.378008\n", + "2017-09-05 -0.353283\n", + "2017-09-06 0.684587\n", "Freq: D, Name: A, dtype: float64" ] }, - "execution_count": 125, + "execution_count": 492, "metadata": {}, "output_type": "execute_result" } @@ -1283,7 +1283,7 @@ }, { "cell_type": "code", - "execution_count": 130, + "execution_count": 493, "metadata": {}, "outputs": [ { @@ -1316,24 +1316,24 @@ " <tbody>\n", " <tr>\n", " <th>2017-09-02</th>\n", - " <td>1.466106</td>\n", - " <td>0.361433</td>\n", - " <td>-0.699672</td>\n", - " <td>0.590918</td>\n", + " <td>-0.521965</td>\n", + " <td>-0.776431</td>\n", + " <td>-0.051510</td>\n", + " <td>0.879938</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-03</th>\n", - " <td>0.858404</td>\n", - " <td>-0.208241</td>\n", - " <td>-0.323344</td>\n", - " <td>-0.269837</td>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>2.149620</td>\n", + " <td>1.794561</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-04</th>\n", - " <td>0.268305</td>\n", - " <td>-0.780656</td>\n", - " <td>-0.229629</td>\n", - " <td>0.778081</td>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>1.700241</td>\n", + " <td>0.859853</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -1341,12 +1341,12 @@ ], "text/plain": [ " A B D C\n", - "2017-09-02 1.466106 0.361433 -0.699672 0.590918\n", - "2017-09-03 0.858404 -0.208241 -0.323344 -0.269837\n", - "2017-09-04 0.268305 -0.780656 -0.229629 0.778081" + "2017-09-02 -0.521965 -0.776431 -0.051510 0.879938\n", + "2017-09-03 0.826019 -0.445444 2.149620 1.794561\n", + "2017-09-04 0.378008 -0.550517 1.700241 0.859853" ] }, - "execution_count": 130, + "execution_count": 493, "metadata": {}, "output_type": "execute_result" } @@ -1358,7 +1358,7 @@ }, { "cell_type": "code", - "execution_count": 135, + "execution_count": 494, "metadata": {}, "outputs": [ { @@ -1391,24 +1391,24 @@ " <tbody>\n", " <tr>\n", " <th>2017-09-02</th>\n", - " <td>1.466106</td>\n", - " <td>0.361433</td>\n", - " <td>-0.699672</td>\n", - " <td>0.590918</td>\n", + " <td>-0.521965</td>\n", + " <td>-0.776431</td>\n", + " <td>-0.051510</td>\n", + " <td>0.879938</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-03</th>\n", - " <td>0.858404</td>\n", - " <td>-0.208241</td>\n", - " <td>-0.323344</td>\n", - " <td>-0.269837</td>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>2.149620</td>\n", + " <td>1.794561</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-04</th>\n", - " <td>0.268305</td>\n", - " <td>-0.780656</td>\n", - " <td>-0.229629</td>\n", - " <td>0.778081</td>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>1.700241</td>\n", + " <td>0.859853</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -1416,12 +1416,12 @@ ], "text/plain": [ " A B D C\n", - "2017-09-02 1.466106 0.361433 -0.699672 0.590918\n", - "2017-09-03 0.858404 -0.208241 -0.323344 -0.269837\n", - "2017-09-04 0.268305 -0.780656 -0.229629 0.778081" + "2017-09-02 -0.521965 -0.776431 -0.051510 0.879938\n", + "2017-09-03 0.826019 -0.445444 2.149620 1.794561\n", + "2017-09-04 0.378008 -0.550517 1.700241 0.859853" ] }, - "execution_count": 135, + "execution_count": 494, "metadata": {}, "output_type": "execute_result" } @@ -1433,20 +1433,20 @@ }, { "cell_type": "code", - "execution_count": 138, + "execution_count": 495, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "A 1.466106\n", - "B 0.361433\n", - "D -0.699672\n", - "C 0.590918\n", + "A -0.521965\n", + "B -0.776431\n", + "D -0.051510\n", + "C 0.879938\n", "Name: 2017-09-02 00:00:00, dtype: float64" ] }, - "execution_count": 138, + "execution_count": 495, "metadata": {}, "output_type": "execute_result" } @@ -1458,7 +1458,7 @@ }, { "cell_type": "code", - "execution_count": 145, + "execution_count": 496, "metadata": {}, "outputs": [ { @@ -1489,33 +1489,33 @@ " <tbody>\n", " <tr>\n", " <th>2017-09-01</th>\n", - " <td>0.230514</td>\n", - " <td>-0.285806</td>\n", + " <td>-0.192859</td>\n", + " <td>0.329142</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-02</th>\n", - " <td>1.466106</td>\n", - " <td>0.361433</td>\n", + " <td>-0.521965</td>\n", + " <td>-0.776431</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-03</th>\n", - " <td>0.858404</td>\n", - " <td>-0.208241</td>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-04</th>\n", - " <td>0.268305</td>\n", - " <td>-0.780656</td>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-05</th>\n", - " <td>1.839360</td>\n", - " <td>1.148876</td>\n", + " <td>-0.353283</td>\n", + " <td>2.524044</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-06</th>\n", - " <td>-1.190543</td>\n", - " <td>0.910471</td>\n", + " <td>0.684587</td>\n", + " <td>-0.493980</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -1523,15 +1523,15 @@ ], "text/plain": [ " A B\n", - "2017-09-01 0.230514 -0.285806\n", - "2017-09-02 1.466106 0.361433\n", - "2017-09-03 0.858404 -0.208241\n", - "2017-09-04 0.268305 -0.780656\n", - "2017-09-05 1.839360 1.148876\n", - "2017-09-06 -1.190543 0.910471" + "2017-09-01 -0.192859 0.329142\n", + "2017-09-02 -0.521965 -0.776431\n", + "2017-09-03 0.826019 -0.445444\n", + "2017-09-04 0.378008 -0.550517\n", + "2017-09-05 -0.353283 2.524044\n", + "2017-09-06 0.684587 -0.493980" ] }, - "execution_count": 145, + "execution_count": 496, "metadata": {}, "output_type": "execute_result" } @@ -1543,7 +1543,7 @@ }, { "cell_type": "code", - "execution_count": 149, + "execution_count": 497, "metadata": {}, "outputs": [ { @@ -1574,18 +1574,18 @@ " <tbody>\n", " <tr>\n", " <th>2017-09-02</th>\n", - " <td>0.361433</td>\n", - " <td>0.590918</td>\n", + " <td>-0.776431</td>\n", + " <td>0.879938</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-03</th>\n", - " <td>-0.208241</td>\n", - " <td>-0.269837</td>\n", + " <td>-0.445444</td>\n", + " <td>1.794561</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-04</th>\n", - " <td>-0.780656</td>\n", - " <td>0.778081</td>\n", + " <td>-0.550517</td>\n", + " <td>0.859853</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -1593,12 +1593,12 @@ ], "text/plain": [ " B C\n", - "2017-09-02 0.361433 0.590918\n", - "2017-09-03 -0.208241 -0.269837\n", - "2017-09-04 -0.780656 0.778081" + "2017-09-02 -0.776431 0.879938\n", + "2017-09-03 -0.445444 1.794561\n", + "2017-09-04 -0.550517 0.859853" ] }, - "execution_count": 149, + "execution_count": 497, "metadata": {}, "output_type": "execute_result" } @@ -1610,18 +1610,18 @@ }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 498, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "A 1.466106\n", - "B 0.361433\n", + "A -0.521965\n", + "B -0.776431\n", "Name: 2017-09-02 00:00:00, dtype: float64" ] }, - "execution_count": 151, + "execution_count": 498, "metadata": {}, "output_type": "execute_result" } @@ -1633,16 +1633,16 @@ }, { "cell_type": "code", - "execution_count": 153, + "execution_count": 499, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "1.4661055188464265" + "-0.52196454176237395" ] }, - "execution_count": 153, + "execution_count": 499, "metadata": {}, "output_type": "execute_result" } @@ -1662,20 +1662,20 @@ }, { "cell_type": "code", - "execution_count": 157, + "execution_count": 500, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "A 0.268305\n", - "B -0.780656\n", - "D -0.229629\n", - "C 0.778081\n", + "A 0.378008\n", + "B -0.550517\n", + "D 1.700241\n", + "C 0.859853\n", "Name: 2017-09-04 00:00:00, dtype: float64" ] }, - "execution_count": 157, + "execution_count": 500, "metadata": {}, "output_type": "execute_result" } @@ -1687,7 +1687,7 @@ }, { "cell_type": "code", - "execution_count": 163, + "execution_count": 501, "metadata": {}, "outputs": [ { @@ -1718,13 +1718,13 @@ " <tbody>\n", " <tr>\n", " <th>2017-09-04</th>\n", - " <td>-0.229629</td>\n", - " <td>0.778081</td>\n", + " <td>1.700241</td>\n", + " <td>0.859853</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-05</th>\n", - " <td>1.277954</td>\n", - " <td>0.700101</td>\n", + " <td>-0.736119</td>\n", + " <td>-0.322952</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -1732,11 +1732,11 @@ ], "text/plain": [ " D C\n", - "2017-09-04 -0.229629 0.778081\n", - "2017-09-05 1.277954 0.700101" + "2017-09-04 1.700241 0.859853\n", + "2017-09-05 -0.736119 -0.322952" ] }, - "execution_count": 163, + "execution_count": 501, "metadata": {}, "output_type": "execute_result" } @@ -1748,7 +1748,7 @@ }, { "cell_type": "code", - "execution_count": 164, + "execution_count": 502, "metadata": {}, "outputs": [ { @@ -1781,29 +1781,29 @@ " <tbody>\n", " <tr>\n", " <th>2017-09-02</th>\n", - " <td>1.466106</td>\n", - " <td>0.361433</td>\n", - " <td>-0.699672</td>\n", - " <td>0.590918</td>\n", + " <td>-0.521965</td>\n", + " <td>-0.776431</td>\n", + " <td>-0.05151</td>\n", + " <td>0.879938</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-03</th>\n", - " <td>0.858404</td>\n", - " <td>-0.208241</td>\n", - " <td>-0.323344</td>\n", - " <td>-0.269837</td>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>2.14962</td>\n", + " <td>1.794561</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ - " A B D C\n", - "2017-09-02 1.466106 0.361433 -0.699672 0.590918\n", - "2017-09-03 0.858404 -0.208241 -0.323344 -0.269837" + " A B D C\n", + "2017-09-02 -0.521965 -0.776431 -0.05151 0.879938\n", + "2017-09-03 0.826019 -0.445444 2.14962 1.794561" ] }, - "execution_count": 164, + "execution_count": 502, "metadata": {}, "output_type": "execute_result" } @@ -1814,7 +1814,7 @@ }, { "cell_type": "code", - "execution_count": 165, + "execution_count": 503, "metadata": {}, "outputs": [ { @@ -1845,33 +1845,33 @@ " <tbody>\n", " <tr>\n", " <th>2017-09-01</th>\n", - " <td>-0.285806</td>\n", - " <td>0.354580</td>\n", + " <td>0.329142</td>\n", + " <td>1.056127</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-02</th>\n", - " <td>0.361433</td>\n", - " <td>-0.699672</td>\n", + " <td>-0.776431</td>\n", + " <td>-0.051510</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-03</th>\n", - " <td>-0.208241</td>\n", - " <td>-0.323344</td>\n", + " <td>-0.445444</td>\n", + " <td>2.149620</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-04</th>\n", - " <td>-0.780656</td>\n", - " <td>-0.229629</td>\n", + " <td>-0.550517</td>\n", + " <td>1.700241</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-05</th>\n", - " <td>1.148876</td>\n", - " <td>1.277954</td>\n", + " <td>2.524044</td>\n", + " <td>-0.736119</td>\n", " </tr>\n", " <tr>\n", " <th>2017-09-06</th>\n", - " <td>0.910471</td>\n", - " <td>-0.072056</td>\n", + " <td>-0.493980</td>\n", + " <td>0.172290</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", @@ -1879,15 +1879,15 @@ ], "text/plain": [ " B D\n", - "2017-09-01 -0.285806 0.354580\n", - "2017-09-02 0.361433 -0.699672\n", - "2017-09-03 -0.208241 -0.323344\n", - "2017-09-04 -0.780656 -0.229629\n", - "2017-09-05 1.148876 1.277954\n", - "2017-09-06 0.910471 -0.072056" + "2017-09-01 0.329142 1.056127\n", + "2017-09-02 -0.776431 -0.051510\n", + "2017-09-03 -0.445444 2.149620\n", + "2017-09-04 -0.550517 1.700241\n", + "2017-09-05 2.524044 -0.736119\n", + "2017-09-06 -0.493980 0.172290" ] }, - "execution_count": 165, + "execution_count": 503, "metadata": {}, "output_type": "execute_result" } @@ -1898,16 +1898,16 @@ }, { "cell_type": "code", - "execution_count": 166, + "execution_count": 504, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.36143299164038073" + "-0.77643072890241949" ] }, - "execution_count": 166, + "execution_count": 504, "metadata": {}, "output_type": "execute_result" } @@ -1916,6 +1916,3103 @@ "df.iloc[1,1]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Boolean indexing" + ] + }, + { + "cell_type": "code", + "execution_count": 505, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>D</th>\n", + " <th>C</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2017-09-03</th>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>2.149620</td>\n", + " <td>1.794561</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-04</th>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>1.700241</td>\n", + " <td>0.859853</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-06</th>\n", + " <td>0.684587</td>\n", + " <td>-0.493980</td>\n", + " <td>0.172290</td>\n", + " <td>-0.113129</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B D C\n", + "2017-09-03 0.826019 -0.445444 2.149620 1.794561\n", + "2017-09-04 0.378008 -0.550517 1.700241 0.859853\n", + "2017-09-06 0.684587 -0.493980 0.172290 -0.113129" + ] + }, + "execution_count": 505, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Using a single column’s values to select data.\n", + "df[df.A > 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 506, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>D</th>\n", + " <th>C</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2017-09-01</th>\n", + " <td>-0.192859</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-02</th>\n", + " <td>-0.521965</td>\n", + " <td>-0.776431</td>\n", + " <td>-0.051510</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-03</th>\n", + " <td>NaN</td>\n", + " <td>-0.445444</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-04</th>\n", + " <td>NaN</td>\n", + " <td>-0.550517</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-05</th>\n", + " <td>-0.353283</td>\n", + " <td>NaN</td>\n", + " <td>-0.736119</td>\n", + " <td>-0.322952</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-06</th>\n", + " <td>NaN</td>\n", + " <td>-0.493980</td>\n", + " <td>NaN</td>\n", + " <td>-0.113129</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B D C\n", + "2017-09-01 -0.192859 NaN NaN NaN\n", + "2017-09-02 -0.521965 -0.776431 -0.051510 NaN\n", + "2017-09-03 NaN -0.445444 NaN NaN\n", + "2017-09-04 NaN -0.550517 NaN NaN\n", + "2017-09-05 -0.353283 NaN -0.736119 -0.322952\n", + "2017-09-06 NaN -0.493980 NaN -0.113129" + ] + }, + "execution_count": 506, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Selecting values from a DataFrame where a boolean condition is met.\n", + "df[df < 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 507, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>D</th>\n", + " <th>C</th>\n", + " <th>E</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2017-09-01</th>\n", + " <td>-0.192859</td>\n", + " <td>0.329142</td>\n", + " <td>1.056127</td>\n", + " <td>0.901094</td>\n", + " <td>one</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-02</th>\n", + " <td>-0.521965</td>\n", + " <td>-0.776431</td>\n", + " <td>-0.051510</td>\n", + " <td>0.879938</td>\n", + " <td>one</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-03</th>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>2.149620</td>\n", + " <td>1.794561</td>\n", + " <td>two</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-04</th>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>1.700241</td>\n", + " <td>0.859853</td>\n", + " <td>three</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-05</th>\n", + " <td>-0.353283</td>\n", + " <td>2.524044</td>\n", + " <td>-0.736119</td>\n", + " <td>-0.322952</td>\n", + " <td>four</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-06</th>\n", + " <td>0.684587</td>\n", + " <td>-0.493980</td>\n", + " <td>0.172290</td>\n", + " <td>-0.113129</td>\n", + " <td>three</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B D C E\n", + "2017-09-01 -0.192859 0.329142 1.056127 0.901094 one\n", + "2017-09-02 -0.521965 -0.776431 -0.051510 0.879938 one\n", + "2017-09-03 0.826019 -0.445444 2.149620 1.794561 two\n", + "2017-09-04 0.378008 -0.550517 1.700241 0.859853 three\n", + "2017-09-05 -0.353283 2.524044 -0.736119 -0.322952 four\n", + "2017-09-06 0.684587 -0.493980 0.172290 -0.113129 three" + ] + }, + "execution_count": 507, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create new copy and add extra column\n", + "df3 = df.copy()\n", + "df3['E'] = ['one', 'one','two','three','four','three']\n", + "df3" + ] + }, + { + "cell_type": "code", + "execution_count": 508, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>D</th>\n", + " <th>C</th>\n", + " <th>E</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2017-09-03</th>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>2.149620</td>\n", + " <td>1.794561</td>\n", + " <td>two</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-05</th>\n", + " <td>-0.353283</td>\n", + " <td>2.524044</td>\n", + " <td>-0.736119</td>\n", + " <td>-0.322952</td>\n", + " <td>four</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B D C E\n", + "2017-09-03 0.826019 -0.445444 2.149620 1.794561 two\n", + "2017-09-05 -0.353283 2.524044 -0.736119 -0.322952 four" + ] + }, + "execution_count": 508, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Use isin() filtering\n", + "df3[df3['E'].isin(['two','four'])]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Modifying data" + ] + }, + { + "cell_type": "code", + "execution_count": 509, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2017-09-02 1\n", + "2017-09-03 2\n", + "2017-09-04 3\n", + "2017-09-05 4\n", + "2017-09-06 5\n", + "2017-09-07 6\n", + "Freq: D, dtype: int64" + ] + }, + "execution_count": 509, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20170902', periods=6))\n", + "\n", + "s1" + ] + }, + { + "cell_type": "code", + "execution_count": 510, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>D</th>\n", + " <th>C</th>\n", + " <th>F</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2017-09-01</th>\n", + " <td>-0.192859</td>\n", + " <td>0.329142</td>\n", + " <td>1.056127</td>\n", + " <td>0.901094</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-02</th>\n", + " <td>-0.521965</td>\n", + " <td>-0.776431</td>\n", + " <td>-0.051510</td>\n", + " <td>0.879938</td>\n", + " <td>1.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-03</th>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>2.149620</td>\n", + " <td>1.794561</td>\n", + " <td>2.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-04</th>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>1.700241</td>\n", + " <td>0.859853</td>\n", + " <td>3.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-05</th>\n", + " <td>-0.353283</td>\n", + " <td>2.524044</td>\n", + " <td>-0.736119</td>\n", + " <td>-0.322952</td>\n", + " <td>4.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-06</th>\n", + " <td>0.684587</td>\n", + " <td>-0.493980</td>\n", + " <td>0.172290</td>\n", + " <td>-0.113129</td>\n", + " <td>5.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B D C F\n", + "2017-09-01 -0.192859 0.329142 1.056127 0.901094 NaN\n", + "2017-09-02 -0.521965 -0.776431 -0.051510 0.879938 1.0\n", + "2017-09-03 0.826019 -0.445444 2.149620 1.794561 2.0\n", + "2017-09-04 0.378008 -0.550517 1.700241 0.859853 3.0\n", + "2017-09-05 -0.353283 2.524044 -0.736119 -0.322952 4.0\n", + "2017-09-06 0.684587 -0.493980 0.172290 -0.113129 5.0" + ] + }, + "execution_count": 510, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Add column F, align by original index\n", + "df['F'] = s1\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 511, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>D</th>\n", + " <th>C</th>\n", + " <th>F</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2017-09-01</th>\n", + " <td>-0.192859</td>\n", + " <td>0.329142</td>\n", + " <td>1.056127</td>\n", + " <td>0.901094</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-02</th>\n", + " <td>0.000000</td>\n", + " <td>-0.776431</td>\n", + " <td>-0.051510</td>\n", + " <td>0.879938</td>\n", + " <td>1.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-03</th>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>2.149620</td>\n", + " <td>1.794561</td>\n", + " <td>2.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-04</th>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>1.700241</td>\n", + " <td>0.859853</td>\n", + " <td>3.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-05</th>\n", + " <td>-0.353283</td>\n", + " <td>2.524044</td>\n", + " <td>-0.736119</td>\n", + " <td>-0.322952</td>\n", + " <td>4.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-06</th>\n", + " <td>0.684587</td>\n", + " <td>-0.493980</td>\n", + " <td>0.172290</td>\n", + " <td>-0.113129</td>\n", + " <td>5.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B D C F\n", + "2017-09-01 -0.192859 0.329142 1.056127 0.901094 NaN\n", + "2017-09-02 0.000000 -0.776431 -0.051510 0.879938 1.0\n", + "2017-09-03 0.826019 -0.445444 2.149620 1.794561 2.0\n", + "2017-09-04 0.378008 -0.550517 1.700241 0.859853 3.0\n", + "2017-09-05 -0.353283 2.524044 -0.736119 -0.322952 4.0\n", + "2017-09-06 0.684587 -0.493980 0.172290 -0.113129 5.0" + ] + }, + "execution_count": 511, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Setting values by label\n", + "df.at['20170902','A'] = 0\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 512, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>D</th>\n", + " <th>C</th>\n", + " <th>F</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2017-09-01</th>\n", + " <td>-0.192859</td>\n", + " <td>0.000000</td>\n", + " <td>1.056127</td>\n", + " <td>0.901094</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-02</th>\n", + " <td>0.000000</td>\n", + " <td>-0.776431</td>\n", + " <td>-0.051510</td>\n", + " <td>0.879938</td>\n", + " <td>1.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-03</th>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>2.149620</td>\n", + " <td>1.794561</td>\n", + " <td>2.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-04</th>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>1.700241</td>\n", + " <td>0.859853</td>\n", + " <td>3.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-05</th>\n", + " <td>-0.353283</td>\n", + " <td>2.524044</td>\n", + " <td>-0.736119</td>\n", + " <td>-0.322952</td>\n", + " <td>4.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-06</th>\n", + " <td>0.684587</td>\n", + " <td>-0.493980</td>\n", + " <td>0.172290</td>\n", + " <td>-0.113129</td>\n", + " <td>5.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B D C F\n", + "2017-09-01 -0.192859 0.000000 1.056127 0.901094 NaN\n", + "2017-09-02 0.000000 -0.776431 -0.051510 0.879938 1.0\n", + "2017-09-03 0.826019 -0.445444 2.149620 1.794561 2.0\n", + "2017-09-04 0.378008 -0.550517 1.700241 0.859853 3.0\n", + "2017-09-05 -0.353283 2.524044 -0.736119 -0.322952 4.0\n", + "2017-09-06 0.684587 -0.493980 0.172290 -0.113129 5.0" + ] + }, + "execution_count": 512, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iat[0,1] = 0\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 513, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>D</th>\n", + " <th>C</th>\n", + " <th>F</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2017-09-01</th>\n", + " <td>-0.192859</td>\n", + " <td>0.000000</td>\n", + " <td>5</td>\n", + " <td>0.901094</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-02</th>\n", + " <td>0.000000</td>\n", + " <td>-0.776431</td>\n", + " <td>5</td>\n", + " <td>0.879938</td>\n", + " <td>1.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-03</th>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>5</td>\n", + " <td>1.794561</td>\n", + " <td>2.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-04</th>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>5</td>\n", + " <td>0.859853</td>\n", + " <td>3.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-05</th>\n", + " <td>-0.353283</td>\n", + " <td>2.524044</td>\n", + " <td>5</td>\n", + " <td>-0.322952</td>\n", + " <td>4.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-06</th>\n", + " <td>0.684587</td>\n", + " <td>-0.493980</td>\n", + " <td>5</td>\n", + " <td>-0.113129</td>\n", + " <td>5.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B D C F\n", + "2017-09-01 -0.192859 0.000000 5 0.901094 NaN\n", + "2017-09-02 0.000000 -0.776431 5 0.879938 1.0\n", + "2017-09-03 0.826019 -0.445444 5 1.794561 2.0\n", + "2017-09-04 0.378008 -0.550517 5 0.859853 3.0\n", + "2017-09-05 -0.353283 2.524044 5 -0.322952 4.0\n", + "2017-09-06 0.684587 -0.493980 5 -0.113129 5.0" + ] + }, + "execution_count": 513, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Setting a column based on a numpy array\n", + "df.loc[:,'D'] = np.array([5] * len(df))\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setting with where()" + ] + }, + { + "cell_type": "code", + "execution_count": 514, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>D</th>\n", + " <th>C</th>\n", + " <th>F</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2017-09-01</th>\n", + " <td>-0.192859</td>\n", + " <td>0.000000</td>\n", + " <td>-5</td>\n", + " <td>-0.901094</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-02</th>\n", + " <td>0.000000</td>\n", + " <td>-0.776431</td>\n", + " <td>-5</td>\n", + " <td>-0.879938</td>\n", + " <td>-1.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-03</th>\n", + " <td>-0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>-5</td>\n", + " <td>-1.794561</td>\n", + " <td>-2.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-04</th>\n", + " <td>-0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>-5</td>\n", + " <td>-0.859853</td>\n", + " <td>-3.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-05</th>\n", + " <td>-0.353283</td>\n", + " <td>-2.524044</td>\n", + " <td>-5</td>\n", + " <td>-0.322952</td>\n", + " <td>-4.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-06</th>\n", + " <td>-0.684587</td>\n", + " <td>-0.493980</td>\n", + " <td>-5</td>\n", + " <td>-0.113129</td>\n", + " <td>-5.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B D C F\n", + "2017-09-01 -0.192859 0.000000 -5 -0.901094 NaN\n", + "2017-09-02 0.000000 -0.776431 -5 -0.879938 -1.0\n", + "2017-09-03 -0.826019 -0.445444 -5 -1.794561 -2.0\n", + "2017-09-04 -0.378008 -0.550517 -5 -0.859853 -3.0\n", + "2017-09-05 -0.353283 -2.524044 -5 -0.322952 -4.0\n", + "2017-09-06 -0.684587 -0.493980 -5 -0.113129 -5.0" + ] + }, + "execution_count": 514, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2 = df.copy()\n", + "df2[df2 > 0] = -df2\n", + "df2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Missing data\n", + "\n", + "- pandas uses the value np.nan to represent missing data\n", + "- It is by default not included in computations. " + ] + }, + { + "cell_type": "code", + "execution_count": 515, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>D</th>\n", + " <th>C</th>\n", + " <th>F</th>\n", + " <th>E</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2017-09-01</th>\n", + " <td>-0.192859</td>\n", + " <td>0.000000</td>\n", + " <td>5</td>\n", + " <td>0.901094</td>\n", + " <td>NaN</td>\n", + " <td>1.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-02</th>\n", + " <td>0.000000</td>\n", + " <td>-0.776431</td>\n", + " <td>5</td>\n", + " <td>0.879938</td>\n", + " <td>1.0</td>\n", + " <td>1.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-03</th>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>5</td>\n", + " <td>1.794561</td>\n", + " <td>2.0</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-04</th>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>5</td>\n", + " <td>0.859853</td>\n", + " <td>3.0</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B D C F E\n", + "2017-09-01 -0.192859 0.000000 5 0.901094 NaN 1.0\n", + "2017-09-02 0.000000 -0.776431 5 0.879938 1.0 1.0\n", + "2017-09-03 0.826019 -0.445444 5 1.794561 2.0 NaN\n", + "2017-09-04 0.378008 -0.550517 5 0.859853 3.0 NaN" + ] + }, + "execution_count": 515, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# reindex (copy) a subset of the data and add an empty column E\n", + "df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])\n", + "# Set E to 1 for first two rows\n", + "df1.loc[dates[0]:dates[1],'E'] = 1\n", + "df1" + ] + }, + { + "cell_type": "code", + "execution_count": 516, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>D</th>\n", + " <th>C</th>\n", + " <th>F</th>\n", + " <th>E</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2017-09-02</th>\n", + " <td>0.0</td>\n", + " <td>-0.776431</td>\n", + " <td>5</td>\n", + " <td>0.879938</td>\n", + " <td>1.0</td>\n", + " <td>1.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B D C F E\n", + "2017-09-02 0.0 -0.776431 5 0.879938 1.0 1.0" + ] + }, + "execution_count": 516, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Drop all rows that have any unknown values\n", + "df1.dropna(how='any')" + ] + }, + { + "cell_type": "code", + "execution_count": 517, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>D</th>\n", + " <th>C</th>\n", + " <th>F</th>\n", + " <th>E</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2017-09-01</th>\n", + " <td>-0.192859</td>\n", + " <td>0.000000</td>\n", + " <td>5</td>\n", + " <td>0.901094</td>\n", + " <td>42.0</td>\n", + " <td>1.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-02</th>\n", + " <td>0.000000</td>\n", + " <td>-0.776431</td>\n", + " <td>5</td>\n", + " <td>0.879938</td>\n", + " <td>1.0</td>\n", + " <td>1.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-03</th>\n", + " <td>0.826019</td>\n", + " <td>-0.445444</td>\n", + " <td>5</td>\n", + " <td>1.794561</td>\n", + " <td>2.0</td>\n", + " <td>42.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-04</th>\n", + " <td>0.378008</td>\n", + " <td>-0.550517</td>\n", + " <td>5</td>\n", + " <td>0.859853</td>\n", + " <td>3.0</td>\n", + " <td>42.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B D C F E\n", + "2017-09-01 -0.192859 0.000000 5 0.901094 42.0 1.0\n", + "2017-09-02 0.000000 -0.776431 5 0.879938 1.0 1.0\n", + "2017-09-03 0.826019 -0.445444 5 1.794561 2.0 42.0\n", + "2017-09-04 0.378008 -0.550517 5 0.859853 3.0 42.0" + ] + }, + "execution_count": 517, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Replace NA with value\n", + "df1.fillna(value=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 518, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>D</th>\n", + " <th>C</th>\n", + " <th>F</th>\n", + " <th>E</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2017-09-01</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-02</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-03</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-04</th>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B D C F E\n", + "2017-09-01 False False False False True False\n", + "2017-09-02 False False False False False False\n", + "2017-09-03 False False False False False True\n", + "2017-09-04 False False False False False True" + ] + }, + "execution_count": 518, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Show the boolean mask\n", + "pd.isnull(df1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Operations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basic (stat) operators" + ] + }, + { + "cell_type": "code", + "execution_count": 519, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "A 0.223745\n", + "B 0.042945\n", + "D 5.000000\n", + "C 0.666561\n", + "F 3.000000\n", + "dtype: float64" + ] + }, + "execution_count": 519, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Mean per column\n", + "df.mean() # similar to axis=0" + ] + }, + { + "cell_type": "code", + "execution_count": 520, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2017-09-01 1.427059\n", + "2017-09-02 1.220702\n", + "2017-09-03 1.835027\n", + "2017-09-04 1.737469\n", + "2017-09-05 2.169562\n", + "2017-09-06 2.015496\n", + "Freq: D, dtype: float64" + ] + }, + "execution_count": 520, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Mean per row\n", + "df.mean(axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Apply" + ] + }, + { + "cell_type": "code", + "execution_count": 521, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>D</th>\n", + " <th>C</th>\n", + " <th>F</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2017-09-01</th>\n", + " <td>0.192859</td>\n", + " <td>-0.000000</td>\n", + " <td>-5</td>\n", + " <td>-0.901094</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-02</th>\n", + " <td>-0.000000</td>\n", + " <td>0.776431</td>\n", + " <td>-5</td>\n", + " <td>-0.879938</td>\n", + " <td>-1.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-03</th>\n", + " <td>-0.826019</td>\n", + " <td>0.445444</td>\n", + " <td>-5</td>\n", + " <td>-1.794561</td>\n", + " <td>-2.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-04</th>\n", + " <td>-0.378008</td>\n", + " <td>0.550517</td>\n", + " <td>-5</td>\n", + " <td>-0.859853</td>\n", + " <td>-3.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-05</th>\n", + " <td>0.353283</td>\n", + " <td>-2.524044</td>\n", + " <td>-5</td>\n", + " <td>0.322952</td>\n", + " <td>-4.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2017-09-06</th>\n", + " <td>-0.684587</td>\n", + " <td>0.493980</td>\n", + " <td>-5</td>\n", + " <td>0.113129</td>\n", + " <td>-5.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B D C F\n", + "2017-09-01 0.192859 -0.000000 -5 -0.901094 NaN\n", + "2017-09-02 -0.000000 0.776431 -5 -0.879938 -1.0\n", + "2017-09-03 -0.826019 0.445444 -5 -1.794561 -2.0\n", + "2017-09-04 -0.378008 0.550517 -5 -0.859853 -3.0\n", + "2017-09-05 0.353283 -2.524044 -5 0.322952 -4.0\n", + "2017-09-06 -0.684587 0.493980 -5 0.113129 -5.0" + ] + }, + "execution_count": 521, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create my own function that returns the negated value\n", + "def my_func(val):\n", + " return -val\n", + "# Apply my function to all values\n", + "df.apply(my_func)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Histogramming" + ] + }, + { + "cell_type": "code", + "execution_count": 522, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2\n", + "1 1\n", + "2 2\n", + "3 2\n", + "4 5\n", + "5 5\n", + "6 1\n", + "7 1\n", + "8 2\n", + "9 3\n", + "dtype: int64" + ] + }, + "execution_count": 522, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s = pd.Series(np.random.randint(0, 7, size=10))\n", + "s" + ] + }, + { + "cell_type": "code", + "execution_count": 523, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2 4\n", + "1 3\n", + "5 2\n", + "3 1\n", + "dtype: int64" + ] + }, + "execution_count": 523, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Concatinating data" + ] + }, + { + "cell_type": "code", + "execution_count": 524, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>-0.330694</td>\n", + " <td>0.872756</td>\n", + " <td>2.112683</td>\n", + " <td>-1.388585</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0.057223</td>\n", + " <td>0.618802</td>\n", + " <td>-0.914080</td>\n", + " <td>0.252215</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>-1.139711</td>\n", + " <td>0.816206</td>\n", + " <td>1.008724</td>\n", + " <td>-1.672792</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0.072709</td>\n", + " <td>0.074521</td>\n", + " <td>-0.089053</td>\n", + " <td>-0.132791</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0.502954</td>\n", + " <td>1.068275</td>\n", + " <td>0.415517</td>\n", + " <td>1.218068</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>-1.710858</td>\n", + " <td>-0.176961</td>\n", + " <td>1.161903</td>\n", + " <td>-1.412063</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>0.876638</td>\n", + " <td>-0.302195</td>\n", + " <td>-1.360723</td>\n", + " <td>-2.326562</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>-0.888707</td>\n", + " <td>-0.609472</td>\n", + " <td>-0.241537</td>\n", + " <td>-1.974217</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>-0.301339</td>\n", + " <td>-0.329511</td>\n", + " <td>-0.040340</td>\n", + " <td>0.168655</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>-0.432671</td>\n", + " <td>-0.518209</td>\n", + " <td>1.575212</td>\n", + " <td>-0.901535</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2 3\n", + "0 -0.330694 0.872756 2.112683 -1.388585\n", + "1 0.057223 0.618802 -0.914080 0.252215\n", + "2 -1.139711 0.816206 1.008724 -1.672792\n", + "3 0.072709 0.074521 -0.089053 -0.132791\n", + "4 0.502954 1.068275 0.415517 1.218068\n", + "5 -1.710858 -0.176961 1.161903 -1.412063\n", + "6 0.876638 -0.302195 -1.360723 -2.326562\n", + "7 -0.888707 -0.609472 -0.241537 -1.974217\n", + "8 -0.301339 -0.329511 -0.040340 0.168655\n", + "9 -0.432671 -0.518209 1.575212 -0.901535" + ] + }, + "execution_count": 524, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(np.random.randn(10, 4))\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 525, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[ 0 1 2 3\n", + " 0 -0.330694 0.872756 2.112683 -1.388585\n", + " 1 0.057223 0.618802 -0.914080 0.252215\n", + " 2 -1.139711 0.816206 1.008724 -1.672792,\n", + " 0 1 2 3\n", + " 3 0.072709 0.074521 -0.089053 -0.132791\n", + " 4 0.502954 1.068275 0.415517 1.218068\n", + " 5 -1.710858 -0.176961 1.161903 -1.412063\n", + " 6 0.876638 -0.302195 -1.360723 -2.326562,\n", + " 0 1 2 3\n", + " 7 -0.888707 -0.609472 -0.241537 -1.974217\n", + " 8 -0.301339 -0.329511 -0.040340 0.168655\n", + " 9 -0.432671 -0.518209 1.575212 -0.901535]" + ] + }, + "execution_count": 525, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chunks = [df[:3], df[3:7], df[7:]]\n", + "chunks" + ] + }, + { + "cell_type": "code", + "execution_count": 526, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>-0.330694</td>\n", + " <td>0.872756</td>\n", + " <td>2.112683</td>\n", + " <td>-1.388585</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0.057223</td>\n", + " <td>0.618802</td>\n", + " <td>-0.914080</td>\n", + " <td>0.252215</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>-1.139711</td>\n", + " <td>0.816206</td>\n", + " <td>1.008724</td>\n", + " <td>-1.672792</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0.072709</td>\n", + " <td>0.074521</td>\n", + " <td>-0.089053</td>\n", + " <td>-0.132791</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0.502954</td>\n", + " <td>1.068275</td>\n", + " <td>0.415517</td>\n", + " <td>1.218068</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>-1.710858</td>\n", + " <td>-0.176961</td>\n", + " <td>1.161903</td>\n", + " <td>-1.412063</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>0.876638</td>\n", + " <td>-0.302195</td>\n", + " <td>-1.360723</td>\n", + " <td>-2.326562</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>-0.888707</td>\n", + " <td>-0.609472</td>\n", + " <td>-0.241537</td>\n", + " <td>-1.974217</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>-0.301339</td>\n", + " <td>-0.329511</td>\n", + " <td>-0.040340</td>\n", + " <td>0.168655</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>-0.432671</td>\n", + " <td>-0.518209</td>\n", + " <td>1.575212</td>\n", + " <td>-0.901535</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2 3\n", + "0 -0.330694 0.872756 2.112683 -1.388585\n", + "1 0.057223 0.618802 -0.914080 0.252215\n", + "2 -1.139711 0.816206 1.008724 -1.672792\n", + "3 0.072709 0.074521 -0.089053 -0.132791\n", + "4 0.502954 1.068275 0.415517 1.218068\n", + "5 -1.710858 -0.176961 1.161903 -1.412063\n", + "6 0.876638 -0.302195 -1.360723 -2.326562\n", + "7 -0.888707 -0.609472 -0.241537 -1.974217\n", + "8 -0.301339 -0.329511 -0.040340 0.168655\n", + "9 -0.432671 -0.518209 1.575212 -0.901535" + ] + }, + "execution_count": 526, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat(chunks)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Joining data" + ] + }, + { + "cell_type": "code", + "execution_count": 527, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})\n", + "right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})" + ] + }, + { + "cell_type": "code", + "execution_count": 528, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>key</th>\n", + " <th>lval</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>foo</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>foo</td>\n", + " <td>2</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " key lval\n", + "0 foo 1\n", + "1 foo 2" + ] + }, + "execution_count": 528, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "left" + ] + }, + { + "cell_type": "code", + "execution_count": 529, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>key</th>\n", + " <th>rval</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>foo</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>foo</td>\n", + " <td>5</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " key rval\n", + "0 foo 4\n", + "1 foo 5" + ] + }, + "execution_count": 529, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "right" + ] + }, + { + "cell_type": "code", + "execution_count": 530, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>key</th>\n", + " <th>lval</th>\n", + " <th>rval</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>foo</td>\n", + " <td>1</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>foo</td>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>foo</td>\n", + " <td>2</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>foo</td>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " key lval rval\n", + "0 foo 1 4\n", + "1 foo 1 5\n", + "2 foo 2 4\n", + "3 foo 2 5" + ] + }, + "execution_count": 530, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.merge(left, right, on='key')" + ] + }, + { + "cell_type": "code", + "execution_count": 531, + "metadata": {}, + "outputs": [], + "source": [ + "left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]})\n", + "right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]})" + ] + }, + { + "cell_type": "code", + "execution_count": 532, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>key</th>\n", + " <th>lval</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>foo</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>bar</td>\n", + " <td>2</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " key lval\n", + "0 foo 1\n", + "1 bar 2" + ] + }, + "execution_count": 532, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "left" + ] + }, + { + "cell_type": "code", + "execution_count": 533, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>key</th>\n", + " <th>rval</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>foo</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>bar</td>\n", + " <td>5</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " key rval\n", + "0 foo 4\n", + "1 bar 5" + ] + }, + "execution_count": 533, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "right" + ] + }, + { + "cell_type": "code", + "execution_count": 534, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>key</th>\n", + " <th>lval</th>\n", + " <th>rval</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>foo</td>\n", + " <td>1</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>bar</td>\n", + " <td>2</td>\n", + " <td>5</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " key lval rval\n", + "0 foo 1 4\n", + "1 bar 2 5" + ] + }, + "execution_count": 534, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.merge(left, right, on='key')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Appending rows" + ] + }, + { + "cell_type": "code", + "execution_count": 535, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>C</th>\n", + " <th>D</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1.460591</td>\n", + " <td>-1.167721</td>\n", + " <td>-0.838375</td>\n", + " <td>-1.239544</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1.670965</td>\n", + " <td>2.563498</td>\n", + " <td>-0.103847</td>\n", + " <td>-1.104923</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>-1.515473</td>\n", + " <td>0.327335</td>\n", + " <td>0.329841</td>\n", + " <td>-1.149319</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0.421181</td>\n", + " <td>1.607888</td>\n", + " <td>-0.011656</td>\n", + " <td>-0.166280</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>1.967833</td>\n", + " <td>0.506429</td>\n", + " <td>1.675263</td>\n", + " <td>0.763151</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>-1.781919</td>\n", + " <td>-1.503685</td>\n", + " <td>0.366258</td>\n", + " <td>-1.276055</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>-0.222054</td>\n", + " <td>1.231713</td>\n", + " <td>-0.358001</td>\n", + " <td>-0.407226</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>0.717591</td>\n", + " <td>-0.060166</td>\n", + " <td>0.008006</td>\n", + " <td>-0.513181</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B C D\n", + "0 1.460591 -1.167721 -0.838375 -1.239544\n", + "1 1.670965 2.563498 -0.103847 -1.104923\n", + "2 -1.515473 0.327335 0.329841 -1.149319\n", + "3 0.421181 1.607888 -0.011656 -0.166280\n", + "4 1.967833 0.506429 1.675263 0.763151\n", + "5 -1.781919 -1.503685 0.366258 -1.276055\n", + "6 -0.222054 1.231713 -0.358001 -0.407226\n", + "7 0.717591 -0.060166 0.008006 -0.513181" + ] + }, + "execution_count": 535, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(np.random.randn(8, 4), columns=['A','B','C','D'])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 536, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "A 0.421181\n", + "B 1.607888\n", + "C -0.011656\n", + "D -0.166280\n", + "Name: 3, dtype: float64" + ] + }, + "execution_count": 536, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s = df.iloc[3]\n", + "s" + ] + }, + { + "cell_type": "code", + "execution_count": 537, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>C</th>\n", + " <th>D</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1.460591</td>\n", + " <td>-1.167721</td>\n", + " <td>-0.838375</td>\n", + " <td>-1.239544</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1.670965</td>\n", + " <td>2.563498</td>\n", + " <td>-0.103847</td>\n", + " <td>-1.104923</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>-1.515473</td>\n", + " <td>0.327335</td>\n", + " <td>0.329841</td>\n", + " <td>-1.149319</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0.421181</td>\n", + " <td>1.607888</td>\n", + " <td>-0.011656</td>\n", + " <td>-0.166280</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>1.967833</td>\n", + " <td>0.506429</td>\n", + " <td>1.675263</td>\n", + " <td>0.763151</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>-1.781919</td>\n", + " <td>-1.503685</td>\n", + " <td>0.366258</td>\n", + " <td>-1.276055</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>-0.222054</td>\n", + " <td>1.231713</td>\n", + " <td>-0.358001</td>\n", + " <td>-0.407226</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>0.717591</td>\n", + " <td>-0.060166</td>\n", + " <td>0.008006</td>\n", + " <td>-0.513181</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0.421181</td>\n", + " <td>1.607888</td>\n", + " <td>-0.011656</td>\n", + " <td>-0.166280</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B C D\n", + "0 1.460591 -1.167721 -0.838375 -1.239544\n", + "1 1.670965 2.563498 -0.103847 -1.104923\n", + "2 -1.515473 0.327335 0.329841 -1.149319\n", + "3 0.421181 1.607888 -0.011656 -0.166280\n", + "4 1.967833 0.506429 1.675263 0.763151\n", + "5 -1.781919 -1.503685 0.366258 -1.276055\n", + "6 -0.222054 1.231713 -0.358001 -0.407226\n", + "7 0.717591 -0.060166 0.008006 -0.513181\n", + "3 0.421181 1.607888 -0.011656 -0.166280" + ] + }, + "execution_count": 537, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.append(s, ignore_index=False).reindex()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Grouping" + ] + }, + { + "cell_type": "code", + "execution_count": 538, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>C</th>\n", + " <th>D</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>foo</td>\n", + " <td>one</td>\n", + " <td>0.776501</td>\n", + " <td>1.452913</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>bar</td>\n", + " <td>one</td>\n", + " <td>-0.731479</td>\n", + " <td>-0.924116</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>foo</td>\n", + " <td>two</td>\n", + " <td>-0.204101</td>\n", + " <td>0.044840</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>bar</td>\n", + " <td>three</td>\n", + " <td>-0.343739</td>\n", + " <td>-0.683006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>foo</td>\n", + " <td>two</td>\n", + " <td>-1.519019</td>\n", + " <td>0.103475</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>bar</td>\n", + " <td>two</td>\n", + " <td>1.271579</td>\n", + " <td>-2.176090</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>foo</td>\n", + " <td>one</td>\n", + " <td>-0.741659</td>\n", + " <td>-0.561117</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>foo</td>\n", + " <td>three</td>\n", + " <td>-0.174572</td>\n", + " <td>1.281778</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B C D\n", + "0 foo one 0.776501 1.452913\n", + "1 bar one -0.731479 -0.924116\n", + "2 foo two -0.204101 0.044840\n", + "3 bar three -0.343739 -0.683006\n", + "4 foo two -1.519019 0.103475\n", + "5 bar two 1.271579 -2.176090\n", + "6 foo one -0.741659 -0.561117\n", + "7 foo three -0.174572 1.281778" + ] + }, + "execution_count": 538, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create \n", + "df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',\n", + " 'foo', 'bar', 'foo', 'foo'],\n", + " 'B' : ['one', 'one', 'two', 'three',\n", + " 'two', 'two', 'one', 'three'],\n", + " 'C' : np.random.randn(8),\n", + " 'D' : np.random.randn(8)})\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 539, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>C</th>\n", + " <th>D</th>\n", + " </tr>\n", + " <tr>\n", + " <th>A</th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>bar</th>\n", + " <td>0.196361</td>\n", + " <td>-3.783212</td>\n", + " </tr>\n", + " <tr>\n", + " <th>foo</th>\n", + " <td>-1.862851</td>\n", + " <td>2.321889</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " C D\n", + "A \n", + "bar 0.196361 -3.783212\n", + "foo -1.862851 2.321889" + ] + }, + "execution_count": 539, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('A').sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 540, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th></th>\n", + " <th>C</th>\n", + " <th>D</th>\n", + " </tr>\n", + " <tr>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th rowspan=\"3\" valign=\"top\">bar</th>\n", + " <th>one</th>\n", + " <td>-0.731479</td>\n", + " <td>-0.924116</td>\n", + " </tr>\n", + " <tr>\n", + " <th>three</th>\n", + " <td>-0.343739</td>\n", + " <td>-0.683006</td>\n", + " </tr>\n", + " <tr>\n", + " <th>two</th>\n", + " <td>1.271579</td>\n", + " <td>-2.176090</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"3\" valign=\"top\">foo</th>\n", + " <th>one</th>\n", + " <td>0.034842</td>\n", + " <td>0.891796</td>\n", + " </tr>\n", + " <tr>\n", + " <th>three</th>\n", + " <td>-0.174572</td>\n", + " <td>1.281778</td>\n", + " </tr>\n", + " <tr>\n", + " <th>two</th>\n", + " <td>-1.723120</td>\n", + " <td>0.148315</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " C D\n", + "A B \n", + "bar one -0.731479 -0.924116\n", + " three -0.343739 -0.683006\n", + " two 1.271579 -2.176090\n", + "foo one 0.034842 0.891796\n", + " three -0.174572 1.281778\n", + " two -1.723120 0.148315" + ] + }, + "execution_count": 540, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby(['A','B']).sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reshaping using pivot_table" + ] + }, + { + "cell_type": "code", + "execution_count": 541, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>C</th>\n", + " <th>D</th>\n", + " <th>E</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>one</td>\n", + " <td>X</td>\n", + " <td>foo</td>\n", + " <td>1.057110</td>\n", + " <td>-0.108982</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>one</td>\n", + " <td>Y</td>\n", + " <td>foo</td>\n", + " <td>1.037212</td>\n", + " <td>0.477061</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>two</td>\n", + " <td>Z</td>\n", + " <td>foo</td>\n", + " <td>1.375108</td>\n", + " <td>0.205573</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>three</td>\n", + " <td>X</td>\n", + " <td>bar</td>\n", + " <td>-0.383392</td>\n", + " <td>1.279249</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>one</td>\n", + " <td>Y</td>\n", + " <td>bar</td>\n", + " <td>0.378141</td>\n", + " <td>0.166755</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>one</td>\n", + " <td>Z</td>\n", + " <td>bar</td>\n", + " <td>-0.528378</td>\n", + " <td>0.405889</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>two</td>\n", + " <td>X</td>\n", + " <td>foo</td>\n", + " <td>0.406508</td>\n", + " <td>0.054299</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>three</td>\n", + " <td>Y</td>\n", + " <td>foo</td>\n", + " <td>-0.937294</td>\n", + " <td>0.251400</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>one</td>\n", + " <td>Z</td>\n", + " <td>foo</td>\n", + " <td>0.750334</td>\n", + " <td>0.880497</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>one</td>\n", + " <td>X</td>\n", + " <td>bar</td>\n", + " <td>0.827021</td>\n", + " <td>-0.148973</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>two</td>\n", + " <td>Y</td>\n", + " <td>bar</td>\n", + " <td>-0.143651</td>\n", + " <td>-0.729501</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>three</td>\n", + " <td>Z</td>\n", + " <td>bar</td>\n", + " <td>-0.205862</td>\n", + " <td>1.432239</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B C D E\n", + "0 one X foo 1.057110 -0.108982\n", + "1 one Y foo 1.037212 0.477061\n", + "2 two Z foo 1.375108 0.205573\n", + "3 three X bar -0.383392 1.279249\n", + "4 one Y bar 0.378141 0.166755\n", + "5 one Z bar -0.528378 0.405889\n", + "6 two X foo 0.406508 0.054299\n", + "7 three Y foo -0.937294 0.251400\n", + "8 one Z foo 0.750334 0.880497\n", + "9 one X bar 0.827021 -0.148973\n", + "10 two Y bar -0.143651 -0.729501\n", + "11 three Z bar -0.205862 1.432239" + ] + }, + "execution_count": 541, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a flat table with duplicated entries\n", + "df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,\n", + " 'B' : ['X', 'Y', 'Z'] * 4,\n", + " 'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,\n", + " 'D' : np.random.randn(12),\n", + " 'E' : np.random.randn(12)})\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 542, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style>\n", + " .dataframe thead tr:only-child th {\n", + " text-align: right;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: left;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>C</th>\n", + " <th>bar</th>\n", + " <th>foo</th>\n", + " </tr>\n", + " <tr>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th rowspan=\"3\" valign=\"top\">one</th>\n", + " <th>X</th>\n", + " <td>0.827021</td>\n", + " <td>1.057110</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Y</th>\n", + " <td>0.378141</td>\n", + " <td>1.037212</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Z</th>\n", + " <td>-0.528378</td>\n", + " <td>0.750334</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"3\" valign=\"top\">three</th>\n", + " <th>X</th>\n", + " <td>-0.383392</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Y</th>\n", + " <td>NaN</td>\n", + " <td>-0.937294</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Z</th>\n", + " <td>-0.205862</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th rowspan=\"3\" valign=\"top\">two</th>\n", + " <th>X</th>\n", + " <td>NaN</td>\n", + " <td>0.406508</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Y</th>\n", + " <td>-0.143651</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Z</th>\n", + " <td>NaN</td>\n", + " <td>1.375108</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + "C bar foo\n", + "A B \n", + "one X 0.827021 1.057110\n", + " Y 0.378141 1.037212\n", + " Z -0.528378 0.750334\n", + "three X -0.383392 NaN\n", + " Y NaN -0.937294\n", + " Z -0.205862 NaN\n", + "two X NaN 0.406508\n", + " Y -0.143651 NaN\n", + " Z NaN 1.375108" + ] + }, + "execution_count": 542, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a pivot table using A and B as the index, making C columns, and using D as the values (E is not used)\n", + "pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Further information\n", + "- https://pandas.pydata.org/pandas-docs/stable/index.html\n", + "- https://stackoverflow.com with pandas tag" + ] + }, { "cell_type": "code", "execution_count": null,