|
4 | 4 | "cell_type": "markdown", |
5 | 5 | "metadata": {}, |
6 | 6 | "source": [ |
7 | | - "# pivot versus pivot_tabel" |
| 7 | + "# pivot versus pivot_table" |
8 | 8 | ] |
9 | 9 | }, |
10 | 10 | { |
11 | 11 | "cell_type": "code", |
12 | | - "execution_count": 13, |
| 12 | + "execution_count": 1, |
13 | 13 | "metadata": {}, |
14 | 14 | "outputs": [], |
15 | 15 | "source": [ |
|
24 | 24 | "pandas has two function to restructure dataframes. Although they are similar, each has its won applications." |
25 | 25 | ] |
26 | 26 | }, |
| 27 | + { |
| 28 | + "cell_type": "markdown", |
| 29 | + "metadata": {}, |
| 30 | + "source": [ |
| 31 | + "## Data" |
| 32 | + ] |
| 33 | + }, |
27 | 34 | { |
28 | 35 | "cell_type": "markdown", |
29 | 36 | "metadata": {}, |
|
33 | 40 | }, |
34 | 41 | { |
35 | 42 | "cell_type": "code", |
36 | | - "execution_count": 14, |
| 43 | + "execution_count": 2, |
37 | 44 | "metadata": {}, |
38 | 45 | "outputs": [], |
39 | 46 | "source": [ |
|
44 | 51 | }, |
45 | 52 | { |
46 | 53 | "cell_type": "code", |
47 | | - "execution_count": 15, |
| 54 | + "execution_count": 3, |
48 | 55 | "metadata": {}, |
49 | 56 | "outputs": [], |
50 | 57 | "source": [ |
|
62 | 69 | }, |
63 | 70 | { |
64 | 71 | "cell_type": "code", |
65 | | - "execution_count": 16, |
| 72 | + "execution_count": 4, |
66 | 73 | "metadata": {}, |
67 | 74 | "outputs": [], |
68 | 75 | "source": [ |
|
71 | 78 | }, |
72 | 79 | { |
73 | 80 | "cell_type": "code", |
74 | | - "execution_count": 17, |
| 81 | + "execution_count": 5, |
75 | 82 | "metadata": {}, |
76 | 83 | "outputs": [ |
77 | 84 | { |
78 | 85 | "name": "stdout", |
79 | 86 | "output_type": "stream", |
80 | 87 | "text": [ |
81 | 88 | "<class 'pandas.core.frame.DataFrame'>\n", |
82 | | - "Int64Index: 62 entries, 0 to 61\n", |
| 89 | + "RangeIndex: 62 entries, 0 to 61\n", |
83 | 90 | "Data columns (total 6 columns):\n", |
84 | 91 | " # Column Non-Null Count Dtype \n", |
85 | 92 | "--- ------ -------------- ----- \n", |
|
90 | 97 | " 4 gender 55 non-null category \n", |
91 | 98 | " 5 condition 55 non-null category \n", |
92 | 99 | "dtypes: category(2), datetime64[ns](1), float32(2), int64(1)\n", |
93 | | - "memory usage: 2.2 KB\n" |
| 100 | + "memory usage: 1.8 KB\n" |
94 | 101 | ] |
95 | 102 | } |
96 | 103 | ], |
|
100 | 107 | }, |
101 | 108 | { |
102 | 109 | "cell_type": "code", |
103 | | - "execution_count": 18, |
| 110 | + "execution_count": 6, |
104 | 111 | "metadata": {}, |
105 | 112 | "outputs": [ |
106 | 113 | { |
|
191 | 198 | "4 1 0.0 2012-10-02 14:00:00 37.500000 M A" |
192 | 199 | ] |
193 | 200 | }, |
194 | | - "execution_count": 18, |
| 201 | + "execution_count": 6, |
195 | 202 | "metadata": {}, |
196 | 203 | "output_type": "execute_result" |
197 | 204 | } |
|
216 | 223 | }, |
217 | 224 | { |
218 | 225 | "cell_type": "code", |
219 | | - "execution_count": 19, |
| 226 | + "execution_count": 7, |
220 | 227 | "metadata": {}, |
221 | 228 | "outputs": [], |
222 | 229 | "source": [ |
|
225 | 232 | }, |
226 | 233 | { |
227 | 234 | "cell_type": "code", |
228 | | - "execution_count": 20, |
| 235 | + "execution_count": 8, |
229 | 236 | "metadata": {}, |
230 | 237 | "outputs": [ |
231 | 238 | { |
|
274 | 281 | " 34 (condition, 8) 7 non-null category\n", |
275 | 282 | " 35 (condition, 9) 7 non-null category\n", |
276 | 283 | "dtypes: category(18), float32(18)\n", |
277 | | - "memory usage: 1.7 KB\n" |
| 284 | + "memory usage: 1.9 KB\n" |
278 | 285 | ] |
279 | 286 | } |
280 | 287 | ], |
|
298 | 305 | }, |
299 | 306 | { |
300 | 307 | "cell_type": "code", |
301 | | - "execution_count": 24, |
| 308 | + "execution_count": 9, |
302 | 309 | "metadata": {}, |
303 | 310 | "outputs": [], |
304 | 311 | "source": [ |
|
308 | 315 | }, |
309 | 316 | { |
310 | 317 | "cell_type": "code", |
311 | | - "execution_count": 25, |
| 318 | + "execution_count": 10, |
312 | 319 | "metadata": {}, |
313 | 320 | "outputs": [ |
314 | 321 | { |
|
358 | 365 | "cell_type": "markdown", |
359 | 366 | "metadata": {}, |
360 | 367 | "source": [ |
361 | | - "The `pivot_table` method on the other hand will only take the numerical columns into account." |
| 368 | + "The `pivot_table` method on the other hand will only take the numerical columns into account. Hence it will not work on this dataframe since it contains categorical data as well." |
362 | 369 | ] |
363 | 370 | }, |
364 | 371 | { |
365 | 372 | "cell_type": "code", |
366 | | - "execution_count": 21, |
| 373 | + "execution_count": 11, |
367 | 374 | "metadata": {}, |
368 | 375 | "outputs": [], |
369 | 376 | "source": [ |
370 | | - "time_series_table = data.pivot_table(index='date', columns='patient')" |
| 377 | + "time_series_table = data.pivot_table(index='date', columns='patient', values=['dose', 'temperature'])" |
371 | 378 | ] |
372 | 379 | }, |
373 | 380 | { |
|
379 | 386 | }, |
380 | 387 | { |
381 | 388 | "cell_type": "code", |
382 | | - "execution_count": 22, |
| 389 | + "execution_count": 12, |
383 | 390 | "metadata": {}, |
384 | 391 | "outputs": [ |
385 | 392 | { |
|
427 | 434 | }, |
428 | 435 | { |
429 | 436 | "cell_type": "code", |
430 | | - "execution_count": 86, |
| 437 | + "execution_count": 13, |
431 | 438 | "metadata": {}, |
432 | 439 | "outputs": [], |
433 | 440 | "source": [ |
434 | 441 | "dose_table = data.pivot_table(index='date',\n", |
435 | 442 | " values=['dose'],\n", |
436 | 443 | " columns='patient',\n", |
437 | | - " aggfunc=np.sum,\n", |
| 444 | + " aggfunc='sum',\n", |
438 | 445 | " margins=True,)" |
439 | 446 | ] |
440 | 447 | }, |
441 | 448 | { |
442 | 449 | "cell_type": "code", |
443 | | - "execution_count": 52, |
| 450 | + "execution_count": 14, |
444 | 451 | "metadata": {}, |
445 | 452 | "outputs": [ |
446 | 453 | { |
|
620 | 627 | "All 6.0 15.0 13.0 10.0 27.0 8.0 30.0 0.0 30.0 139.0" |
621 | 628 | ] |
622 | 629 | }, |
623 | | - "execution_count": 52, |
| 630 | + "execution_count": 14, |
624 | 631 | "metadata": {}, |
625 | 632 | "output_type": "execute_result" |
626 | 633 | } |
|
645 | 652 | }, |
646 | 653 | { |
647 | 654 | "cell_type": "code", |
648 | | - "execution_count": 73, |
| 655 | + "execution_count": 15, |
649 | 656 | "metadata": {}, |
650 | 657 | "outputs": [ |
651 | 658 | { |
|
710 | 717 | " B 40.700001" |
711 | 718 | ] |
712 | 719 | }, |
713 | | - "execution_count": 73, |
| 720 | + "execution_count": 15, |
714 | 721 | "metadata": {}, |
715 | 722 | "output_type": "execute_result" |
716 | 723 | } |
717 | 724 | ], |
718 | 725 | "source": [ |
719 | 726 | "data.pivot_table(index=['gender', 'condition'],\n", |
720 | 727 | " values='temperature',\n", |
721 | | - " aggfunc=np.max,)" |
| 728 | + " aggfunc='max',)" |
722 | 729 | ] |
723 | 730 | }, |
724 | 731 | { |
|
730 | 737 | }, |
731 | 738 | { |
732 | 739 | "cell_type": "code", |
733 | | - "execution_count": 76, |
| 740 | + "execution_count": 16, |
734 | 741 | "metadata": {}, |
735 | 742 | "outputs": [ |
736 | 743 | { |
|
767 | 774 | " </thead>\n", |
768 | 775 | " <tbody>\n", |
769 | 776 | " <tr>\n", |
770 | | - " <th rowspan=\"3\" valign=\"top\">F</th>\n", |
| 777 | + " <th rowspan=\"9\" valign=\"top\">F</th>\n", |
| 778 | + " <th>1</th>\n", |
| 779 | + " <td>0.0</td>\n", |
| 780 | + " <td>NaN</td>\n", |
| 781 | + " </tr>\n", |
| 782 | + " <tr>\n", |
771 | 783 | " <th>2</th>\n", |
772 | 784 | " <td>15.0</td>\n", |
773 | 785 | " <td>39.400002</td>\n", |
774 | 786 | " </tr>\n", |
775 | 787 | " <tr>\n", |
| 788 | + " <th>3</th>\n", |
| 789 | + " <td>0.0</td>\n", |
| 790 | + " <td>NaN</td>\n", |
| 791 | + " </tr>\n", |
| 792 | + " <tr>\n", |
| 793 | + " <th>4</th>\n", |
| 794 | + " <td>0.0</td>\n", |
| 795 | + " <td>NaN</td>\n", |
| 796 | + " </tr>\n", |
| 797 | + " <tr>\n", |
| 798 | + " <th>5</th>\n", |
| 799 | + " <td>0.0</td>\n", |
| 800 | + " <td>NaN</td>\n", |
| 801 | + " </tr>\n", |
| 802 | + " <tr>\n", |
776 | 803 | " <th>6</th>\n", |
777 | 804 | " <td>8.0</td>\n", |
778 | 805 | " <td>38.099998</td>\n", |
779 | 806 | " </tr>\n", |
780 | 807 | " <tr>\n", |
| 808 | + " <th>7</th>\n", |
| 809 | + " <td>0.0</td>\n", |
| 810 | + " <td>NaN</td>\n", |
| 811 | + " </tr>\n", |
| 812 | + " <tr>\n", |
781 | 813 | " <th>8</th>\n", |
782 | 814 | " <td>0.0</td>\n", |
783 | 815 | " <td>37.900002</td>\n", |
784 | 816 | " </tr>\n", |
785 | 817 | " <tr>\n", |
786 | | - " <th rowspan=\"5\" valign=\"top\">M</th>\n", |
| 818 | + " <th>9</th>\n", |
| 819 | + " <td>0.0</td>\n", |
| 820 | + " <td>NaN</td>\n", |
| 821 | + " </tr>\n", |
| 822 | + " <tr>\n", |
| 823 | + " <th rowspan=\"9\" valign=\"top\">M</th>\n", |
787 | 824 | " <th>1</th>\n", |
788 | 825 | " <td>6.0</td>\n", |
789 | 826 | " <td>38.500000</td>\n", |
790 | 827 | " </tr>\n", |
791 | 828 | " <tr>\n", |
| 829 | + " <th>2</th>\n", |
| 830 | + " <td>0.0</td>\n", |
| 831 | + " <td>NaN</td>\n", |
| 832 | + " </tr>\n", |
| 833 | + " <tr>\n", |
792 | 834 | " <th>3</th>\n", |
793 | 835 | " <td>13.0</td>\n", |
794 | 836 | " <td>39.500000</td>\n", |
795 | 837 | " </tr>\n", |
796 | 838 | " <tr>\n", |
| 839 | + " <th>4</th>\n", |
| 840 | + " <td>0.0</td>\n", |
| 841 | + " <td>NaN</td>\n", |
| 842 | + " </tr>\n", |
| 843 | + " <tr>\n", |
797 | 844 | " <th>5</th>\n", |
798 | 845 | " <td>27.0</td>\n", |
799 | 846 | " <td>39.500000</td>\n", |
800 | 847 | " </tr>\n", |
801 | 848 | " <tr>\n", |
| 849 | + " <th>6</th>\n", |
| 850 | + " <td>0.0</td>\n", |
| 851 | + " <td>NaN</td>\n", |
| 852 | + " </tr>\n", |
| 853 | + " <tr>\n", |
802 | 854 | " <th>7</th>\n", |
803 | 855 | " <td>30.0</td>\n", |
804 | 856 | " <td>40.700001</td>\n", |
805 | 857 | " </tr>\n", |
806 | 858 | " <tr>\n", |
| 859 | + " <th>8</th>\n", |
| 860 | + " <td>0.0</td>\n", |
| 861 | + " <td>NaN</td>\n", |
| 862 | + " </tr>\n", |
| 863 | + " <tr>\n", |
807 | 864 | " <th>9</th>\n", |
808 | 865 | " <td>30.0</td>\n", |
809 | 866 | " <td>40.200001</td>\n", |
|
815 | 872 | "text/plain": [ |
816 | 873 | " dose temperature\n", |
817 | 874 | "gender patient \n", |
818 | | - "F 2 15.0 39.400002\n", |
| 875 | + "F 1 0.0 NaN\n", |
| 876 | + " 2 15.0 39.400002\n", |
| 877 | + " 3 0.0 NaN\n", |
| 878 | + " 4 0.0 NaN\n", |
| 879 | + " 5 0.0 NaN\n", |
819 | 880 | " 6 8.0 38.099998\n", |
| 881 | + " 7 0.0 NaN\n", |
820 | 882 | " 8 0.0 37.900002\n", |
| 883 | + " 9 0.0 NaN\n", |
821 | 884 | "M 1 6.0 38.500000\n", |
| 885 | + " 2 0.0 NaN\n", |
822 | 886 | " 3 13.0 39.500000\n", |
| 887 | + " 4 0.0 NaN\n", |
823 | 888 | " 5 27.0 39.500000\n", |
| 889 | + " 6 0.0 NaN\n", |
824 | 890 | " 7 30.0 40.700001\n", |
| 891 | + " 8 0.0 NaN\n", |
825 | 892 | " 9 30.0 40.200001" |
826 | 893 | ] |
827 | 894 | }, |
828 | | - "execution_count": 76, |
| 895 | + "execution_count": 16, |
829 | 896 | "metadata": {}, |
830 | 897 | "output_type": "execute_result" |
831 | 898 | } |
|
834 | 901 | "data.pivot_table(index=['gender', 'patient'],\n", |
835 | 902 | " values=['temperature', 'dose'],\n", |
836 | 903 | " aggfunc={\n", |
837 | | - " 'temperature': np.max,\n", |
838 | | - " 'dose': np.sum,\n", |
| 904 | + " 'temperature': 'max',\n", |
| 905 | + " 'dose': 'sum',\n", |
839 | 906 | " },)" |
840 | 907 | ] |
841 | 908 | } |
842 | 909 | ], |
843 | 910 | "metadata": { |
844 | 911 | "kernelspec": { |
845 | | - "display_name": "Python 3", |
| 912 | + "display_name": "Python 3 (ipykernel)", |
846 | 913 | "language": "python", |
847 | 914 | "name": "python3" |
848 | 915 | }, |
|
856 | 923 | "name": "python", |
857 | 924 | "nbconvert_exporter": "python", |
858 | 925 | "pygments_lexer": "ipython3", |
859 | | - "version": "3.7.6" |
| 926 | + "version": "3.12.0" |
860 | 927 | } |
861 | 928 | }, |
862 | 929 | "nbformat": 4, |
|
0 commit comments