|
4 | 4 | "cell_type": "markdown", |
5 | 5 | "metadata": {}, |
6 | 6 | "source": [ |
7 | | - "# Missing data" |
| 7 | + "# Requirements" |
8 | 8 | ] |
9 | 9 | }, |
10 | 10 | { |
11 | 11 | "cell_type": "code", |
12 | | - "execution_count": 9, |
| 12 | + "execution_count": 2, |
13 | 13 | "metadata": {}, |
14 | 14 | "outputs": [], |
15 | 15 | "source": [ |
|
28 | 28 | "cell_type": "markdown", |
29 | 29 | "metadata": {}, |
30 | 30 | "source": [ |
31 | | - "## Representing missing values" |
| 31 | + "# Representing missing values" |
32 | 32 | ] |
33 | 33 | }, |
34 | 34 | { |
|
40 | 40 | }, |
41 | 41 | { |
42 | 42 | "cell_type": "code", |
43 | | - "execution_count": 32, |
| 43 | + "execution_count": 3, |
44 | 44 | "metadata": {}, |
45 | 45 | "outputs": [], |
46 | 46 | "source": [ |
|
49 | 49 | }, |
50 | 50 | { |
51 | 51 | "cell_type": "code", |
52 | | - "execution_count": 16, |
| 52 | + "execution_count": 4, |
53 | 53 | "metadata": {}, |
54 | 54 | "outputs": [ |
55 | 55 | { |
|
90 | 90 | }, |
91 | 91 | { |
92 | 92 | "cell_type": "code", |
93 | | - "execution_count": 24, |
| 93 | + "execution_count": 5, |
94 | 94 | "metadata": {}, |
95 | 95 | "outputs": [], |
96 | 96 | "source": [ |
|
103 | 103 | }, |
104 | 104 | { |
105 | 105 | "cell_type": "code", |
106 | | - "execution_count": 25, |
| 106 | + "execution_count": 6, |
107 | 107 | "metadata": {}, |
108 | 108 | "outputs": [ |
109 | 109 | { |
|
130 | 130 | }, |
131 | 131 | { |
132 | 132 | "cell_type": "code", |
133 | | - "execution_count": 26, |
| 133 | + "execution_count": 7, |
134 | 134 | "metadata": {}, |
135 | 135 | "outputs": [ |
136 | 136 | { |
|
249 | 249 | "9 37 7.7 B <NA>" |
250 | 250 | ] |
251 | 251 | }, |
252 | | - "execution_count": 26, |
| 252 | + "execution_count": 7, |
253 | 253 | "metadata": {}, |
254 | 254 | "output_type": "execute_result" |
255 | 255 | } |
|
269 | 269 | "cell_type": "markdown", |
270 | 270 | "metadata": {}, |
271 | 271 | "source": [ |
272 | | - "## Computing and missing values" |
| 272 | + "# Computing and missing values" |
273 | 273 | ] |
274 | 274 | }, |
275 | 275 | { |
|
281 | 281 | }, |
282 | 282 | { |
283 | 283 | "cell_type": "code", |
284 | | - "execution_count": 27, |
| 284 | + "execution_count": 8, |
285 | 285 | "metadata": {}, |
286 | 286 | "outputs": [ |
287 | 287 | { |
|
290 | 290 | "165" |
291 | 291 | ] |
292 | 292 | }, |
293 | | - "execution_count": 27, |
| 293 | + "execution_count": 8, |
294 | 294 | "metadata": {}, |
295 | 295 | "output_type": "execute_result" |
296 | 296 | } |
|
301 | 301 | }, |
302 | 302 | { |
303 | 303 | "cell_type": "code", |
304 | | - "execution_count": 28, |
| 304 | + "execution_count": 9, |
305 | 305 | "metadata": {}, |
306 | 306 | "outputs": [ |
307 | 307 | { |
|
310 | 310 | "49.3" |
311 | 311 | ] |
312 | 312 | }, |
313 | | - "execution_count": 28, |
| 313 | + "execution_count": 9, |
314 | 314 | "metadata": {}, |
315 | 315 | "output_type": "execute_result" |
316 | 316 | } |
|
321 | 321 | }, |
322 | 322 | { |
323 | 323 | "cell_type": "code", |
324 | | - "execution_count": 29, |
| 324 | + "execution_count": 10, |
325 | 325 | "metadata": {}, |
326 | 326 | "outputs": [ |
327 | 327 | { |
|
406 | 406 | "max 37.000000 7.700000" |
407 | 407 | ] |
408 | 408 | }, |
409 | | - "execution_count": 29, |
| 409 | + "execution_count": 10, |
410 | 410 | "metadata": {}, |
411 | 411 | "output_type": "execute_result" |
412 | 412 | } |
|
424 | 424 | }, |
425 | 425 | { |
426 | 426 | "cell_type": "code", |
427 | | - "execution_count": 31, |
| 427 | + "execution_count": 11, |
428 | 428 | "metadata": {}, |
429 | 429 | "outputs": [ |
430 | 430 | { |
|
466 | 466 | " <tr>\n", |
467 | 467 | " <th>top</th>\n", |
468 | 468 | " <td>A</td>\n", |
469 | | - " <td>str1_str1</td>\n", |
| 469 | + " <td>str1</td>\n", |
470 | 470 | " </tr>\n", |
471 | 471 | " <tr>\n", |
472 | 472 | " <th>freq</th>\n", |
|
481 | 481 | " category_data string_data\n", |
482 | 482 | "count 9 9\n", |
483 | 483 | "unique 2 9\n", |
484 | | - "top A str1_str1\n", |
| 484 | + "top A str1\n", |
485 | 485 | "freq 5 1" |
486 | 486 | ] |
487 | 487 | }, |
488 | | - "execution_count": 31, |
| 488 | + "execution_count": 11, |
489 | 489 | "metadata": {}, |
490 | 490 | "output_type": "execute_result" |
491 | 491 | } |
|
496 | 496 | }, |
497 | 497 | { |
498 | 498 | "cell_type": "code", |
499 | | - "execution_count": 39, |
| 499 | + "execution_count": 12, |
500 | 500 | "metadata": {}, |
501 | 501 | "outputs": [ |
502 | 502 | { |
|
547 | 547 | "B 3" |
548 | 548 | ] |
549 | 549 | }, |
550 | | - "execution_count": 39, |
| 550 | + "execution_count": 12, |
551 | 551 | "metadata": {}, |
552 | 552 | "output_type": "execute_result" |
553 | 553 | } |
|
558 | 558 | " .count()" |
559 | 559 | ] |
560 | 560 | }, |
| 561 | + { |
| 562 | + "cell_type": "markdown", |
| 563 | + "metadata": {}, |
| 564 | + "source": [ |
| 565 | + "# Selecting rows with missing data" |
| 566 | + ] |
| 567 | + }, |
561 | 568 | { |
562 | 569 | "cell_type": "code", |
563 | | - "execution_count": null, |
| 570 | + "execution_count": 16, |
564 | 571 | "metadata": {}, |
565 | | - "outputs": [], |
566 | | - "source": [] |
| 572 | + "outputs": [ |
| 573 | + { |
| 574 | + "data": { |
| 575 | + "text/html": [ |
| 576 | + "<div>\n", |
| 577 | + "<style scoped>\n", |
| 578 | + " .dataframe tbody tr th:only-of-type {\n", |
| 579 | + " vertical-align: middle;\n", |
| 580 | + " }\n", |
| 581 | + "\n", |
| 582 | + " .dataframe tbody tr th {\n", |
| 583 | + " vertical-align: top;\n", |
| 584 | + " }\n", |
| 585 | + "\n", |
| 586 | + " .dataframe thead th {\n", |
| 587 | + " text-align: right;\n", |
| 588 | + " }\n", |
| 589 | + "</style>\n", |
| 590 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 591 | + " <thead>\n", |
| 592 | + " <tr style=\"text-align: right;\">\n", |
| 593 | + " <th></th>\n", |
| 594 | + " <th>int_data</th>\n", |
| 595 | + " <th>float_data</th>\n", |
| 596 | + " <th>category_data</th>\n", |
| 597 | + " <th>string_data</th>\n", |
| 598 | + " </tr>\n", |
| 599 | + " </thead>\n", |
| 600 | + " <tbody>\n", |
| 601 | + " <tr>\n", |
| 602 | + " <th>6</th>\n", |
| 603 | + " <td><NA></td>\n", |
| 604 | + " <td>5.5</td>\n", |
| 605 | + " <td>A</td>\n", |
| 606 | + " <td>str3</td>\n", |
| 607 | + " </tr>\n", |
| 608 | + " <tr>\n", |
| 609 | + " <th>7</th>\n", |
| 610 | + " <td>29</td>\n", |
| 611 | + " <td>NaN</td>\n", |
| 612 | + " <td>B</td>\n", |
| 613 | + " <td>str3_str1</td>\n", |
| 614 | + " </tr>\n", |
| 615 | + " <tr>\n", |
| 616 | + " <th>8</th>\n", |
| 617 | + " <td>31</td>\n", |
| 618 | + " <td>3.3</td>\n", |
| 619 | + " <td>NaN</td>\n", |
| 620 | + " <td>str2_str3</td>\n", |
| 621 | + " </tr>\n", |
| 622 | + " <tr>\n", |
| 623 | + " <th>9</th>\n", |
| 624 | + " <td>37</td>\n", |
| 625 | + " <td>7.7</td>\n", |
| 626 | + " <td>B</td>\n", |
| 627 | + " <td><NA></td>\n", |
| 628 | + " </tr>\n", |
| 629 | + " </tbody>\n", |
| 630 | + "</table>\n", |
| 631 | + "</div>" |
| 632 | + ], |
| 633 | + "text/plain": [ |
| 634 | + " int_data float_data category_data string_data\n", |
| 635 | + "6 <NA> 5.5 A str3\n", |
| 636 | + "7 29 NaN B str3_str1\n", |
| 637 | + "8 31 3.3 NaN str2_str3\n", |
| 638 | + "9 37 7.7 B <NA>" |
| 639 | + ] |
| 640 | + }, |
| 641 | + "execution_count": 16, |
| 642 | + "metadata": {}, |
| 643 | + "output_type": "execute_result" |
| 644 | + } |
| 645 | + ], |
| 646 | + "source": [ |
| 647 | + "data2[data2.isnull().any(axis=1)]" |
| 648 | + ] |
567 | 649 | } |
568 | 650 | ], |
569 | 651 | "metadata": { |
|
582 | 664 | "name": "python", |
583 | 665 | "nbconvert_exporter": "python", |
584 | 666 | "pygments_lexer": "ipython3", |
585 | | - "version": "3.7.6" |
586 | | - } |
| 667 | + "version": "3.7.7" |
| 668 | + }, |
| 669 | + "toc-autonumbering": true |
587 | 670 | }, |
588 | 671 | "nbformat": 4, |
589 | 672 | "nbformat_minor": 4 |
|
0 commit comments