#泰坦尼克船员获救案例 import pandas as pd import numpy as np titanic_train = pd.read_csv("E:\Python\ku\pandas/titanic_train.csv") # print(food_info.head()) print(titanic_train.columns) age = titanic_train["Age"] print(age.loc[0:10]) #有NaN age_is_null = pd.isnull(age) print(age_is_null) #打印所有的空值 age_null_true =age[age_is_null] print("所有的空值:\n", age_null_true) #总共的空值 age_null_count = len(age_null_true) print("总共的空值:", age_null_count) #求平均值 mean_age = sum(age) / len(age) print("平均值:", mean_age) #处理掉NAN #方法一: good_ages = age[age_is_null == False] correct_mean_age = sum(good_ages) / len(good_ages) print("处理掉NAN后的平均值:", correct_mean_age) #方法二: correct_mean_age = age.mean() print("处理掉NAN后的平均值(方法二):", correct_mean_age) #方法三: #用均值、中位数、众数填充NAN #求船舱平均价值 passenger_classes = [1, 2, 3] #船舱等级分三类list fares_by_class = {} for this_class in passenger_classes: #取其中一类 pclass_rows = titanic_train[titanic_train["Pclass"] == this_class] #取出所有Pclass属于这类的行 pclass_fares = pclass_rows["Fare"] #取出Fare列 fare_for_class = pclass_fares.mean() #计算这一列的平均值 fares_by_class[this_class] = fare_for_class #用dict字典存平均值 print(fares_by_class) #各等级船舱获救概率简单计算方法 #pd.pivot_table(index="以谁为基准",values="Pclass与谁之间的关系",aggfunc="什么关系")aggfunc默认为求平均值 #理解:index类似于sql中group by; values就是取分类后其中一些列; aggfunc就是对这些列计算 passenger_survival = titanic_train.pivot_table(index="Pclass", values="Survived", aggfunc=np.mean) print(passenger_survival) #各等级船舱的平均年龄 passenger_age = titanic_train.pivot_table(index="Pclass", values="Age") print(passenger_age) #一个量与多个量之间的关系 #登船地点(船票价格)与获救之间的关系 port_stats = titanic_train.pivot_table(index="Embarked", values=["Fare", "Survived"], aggfunc=np.sum) print(port_stats) #丢掉缺失值 drop_na_columns = titanic_train.dropna(axis=1) #只要有空值就删除该列,一般不这么做 new_titanic_train = titanic_train.dropna(axis=0, subset=["Age", "Sex"]) #在Age和Sex列中有空值就删除该行 # print(new_titanic_train.head(3)) #取值 row_index_83_age = titanic_train.loc[83, "Age"] print(row_index_83_age) #排序并修改序号 new_titanic_train = titanic_train.sort_values("Age", ascending=False) print(new_titanic_train) titanic_reindexed = new_titanic_train.reset_index(drop=True) #丢掉原来的序号并重置序号 print(titanic_reindexed.loc[0:10]) #自定义函数 # 返回第一百个数据 def hundredth_row(column): #括号中为列,本函数有多少列执行多少次 hundredth_item = column.loc[99] #第100行,执行时每次在一列中找到第一百行存一个值到hundredth_item return hundredth_item hundredth_row = titanic_train.apply(hundredth_row) print(hundredth_row) # 统计每列中有多少个空值 def not_null_count(column): columns_null = pd.isnull(column) # 判断每个值是否为空,先定位列在定位行 null = titanic_train[columns_null] # 返回有空值的行 return len(null) column_null_count = titanic_train.apply(not_null_count) print(column_null_count) # 转义列中的值 def which_class(column): pclass = column["Pclass"] if pd.isnull(pclass): return "Unknown" elif pclass == 1: return "First Class" elif pclass == 2: return "Second Class" elif pclass == 3: return "Third Class" classes = titanic_train.apply(which_class, axis=1) #就是0轴匹配的是index, 涉及上下运算;1轴匹配的是columns, 涉及左右运算。 # 对axis的理解: # 如果axis=0,则沿着纵轴进行操作; # axis=1,则沿着横轴进行操作。 # 但这只是简单的二位数组,如果是多维的呢? # 可以总结为一句话: # 设axis=i,则numpy沿着第i个下标变化的放下进行操作。 # 例如:data =[[a00, a01],[a10,a11]], # 所以axis=0时,沿着第0个下标变化的方向进行操作, # 也就是a00->a10, a01->a11, # 也就是纵坐标的方向,axis=1时也类似。 print(classes)
打印结果:
E:\Python\venv\Scripts\python.exe E:/Python/ku/pandas/2.计算.py
Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
dtype='object')
0 22.0
1 38.0
2 26.0
3 35.0
4 35.0
5 NaN
6 54.0
7 2.0
8 27.0
9 14.0
10 4.0
Name: Age, dtype: float64
0 False
1 False
2 False
3 False
4 False
5 True
6 False
7 False
8 False
9 False
10 False
11 False
12 False
13 False
14 False
15 False
16 False
17 True
18 False
19 True
20 False
21 False
22 False
23 False
24 False
25 False
26 True
27 False
28 True
29 True
...
861 False
862 False
863 True
864 False
865 False
866 False
867 False
868 True
869 False
870 False
871 False
872 False
873 False
874 False
875 False
876 False
877 False
878 True
879 False
880 False
881 False
882 False
883 False
884 False
885 False
886 False
887 False
888 True
889 False
890 False
Name: Age, Length: 891, dtype: bool
所有的空值:
5 NaN
17 NaN
19 NaN
26 NaN
28 NaN
29 NaN
31 NaN
32 NaN
36 NaN
42 NaN
45 NaN
46 NaN
47 NaN
48 NaN
55 NaN
64 NaN
65 NaN
76 NaN
77 NaN
82 NaN
87 NaN
95 NaN
101 NaN
107 NaN
109 NaN
121 NaN
126 NaN
128 NaN
140 NaN
154 NaN
..
718 NaN
727 NaN
732 NaN
738 NaN
739 NaN
740 NaN
760 NaN
766 NaN
768 NaN
773 NaN
776 NaN
778 NaN
783 NaN
790 NaN
792 NaN
793 NaN
815 NaN
825 NaN
826 NaN
828 NaN
832 NaN
837 NaN
839 NaN
846 NaN
849 NaN
859 NaN
863 NaN
868 NaN
878 NaN
888 NaN
Name: Age, Length: 177, dtype: float64
总共的空值: 177
平均值: nan
处理掉NAN后的平均值: 29.69911764705882
处理掉NAN后的平均值(方法二): 29.6991176471
{1: 84.154687499999994, 2: 20.662183152173913, 3: 13.675550101832993}
Survived
Pclass
1 0.629630
2 0.472826
3 0.242363
Age
Pclass
1 38.233441
2 29.877630
3 25.140620
Fare Survived
Embarked
C 10072.2962 93
Q 1022.2543 30
S 17439.3988 217
28.0
PassengerId Survived Pclass \
630 631 1 1
851 852 0 3
493 494 0 1
96 97 0 1
116 117 0 3
672 673 0 2
745 746 0 1
33 34 0 2
54 55 0 1
280 281 0 3
456 457 0 1
438 439 0 1
545 546 0 1
275 276 1 1
483 484 1 3
570 571 1 2
252 253 0 1
829 830 1 1
555 556 0 1
625 626 0 1
326 327 0 3
170 171 0 1
684 685 0 2
694 695 0 1
587 588 1 1
366 367 1 1
94 95 0 3
232 233 0 2
268 269 1 1
11 12 1 1
.. ... ... ...
718 719 0 3
727 728 1 3
732 733 0 2
738 739 0 3
739 740 0 3
740 741 1 1
760 761 0 3
766 767 0 1
768 769 0 3
773 774 0 3
776 777 0 3
778 779 0 3
783 784 0 3
790 791 0 3
792 793 0 3
793 794 0 1
815 816 0 1
825 826 0 3
826 827 0 3
828 829 1 3
832 833 0 3
837 838 0 3
839 840 1 1
846 847 0 3
849 850 1 1
859 860 0 3
863 864 0 3
868 869 0 3
878 879 0 3
888 889 0 3
Name Sex Age SibSp \
630 Barkworth, Mr. Algernon Henry Wilson male 80.0 0
851 Svensson, Mr. Johan male 74.0 0
493 Artagaveytia, Mr. Ramon male 71.0 0
96 Goldschmidt, Mr. George B male 71.0 0
116 Connors, Mr. Patrick male 70.5 0
672 Mitchell, Mr. Henry Michael male 70.0 0
745 Crosby, Capt. Edward Gifford male 70.0 1
33 Wheadon, Mr. Edward H male 66.0 0
54 Ostby, Mr. Engelhart Cornelius male 65.0 0
280 Duane, Mr. Frank male 65.0 0
456 Millet, Mr. Francis Davis male 65.0 0
438 Fortune, Mr. Mark male 64.0 1
545 Nicholson, Mr. Arthur Ernest male 64.0 0
275 Andrews, Miss. Kornelia Theodosia female 63.0 1
483 Turkula, Mrs. (Hedwig) female 63.0 0
570 Harris, Mr. George male 62.0 0
252 Stead, Mr. William Thomas male 62.0 0
829 Stone, Mrs. George Nelson (Martha Evelyn) female 62.0 0
555 Wright, Mr. George male 62.0 0
625 Sutton, Mr. Frederick male 61.0 0
326 Nysveen, Mr. Johan Hansen male 61.0 0
170 Van der hoef, Mr. Wyckoff male 61.0 0
684 Brown, Mr. Thomas William Solomon male 60.0 1
694 Weir, Col. John male 60.0 0
587 Frolicher-Stehli, Mr. Maxmillian male 60.0 1
366 Warren, Mrs. Frank Manley (Anna Sophia Atkinson) female 60.0 1
94 Coxon, Mr. Daniel male 59.0 0
232 Sjostedt, Mr. Ernst Adolf male 59.0 0
268 Graham, Mrs. William Thompson (Edith Junkins) female 58.0 0
11 Bonnell, Miss. Elizabeth female 58.0 0
.. ... ... ... ...
718 McEvoy, Mr. Michael male NaN 0
727 Mannion, Miss. Margareth female NaN 0
732 Knight, Mr. Robert J male NaN 0
738 Ivanoff, Mr. Kanio male NaN 0
739 Nankoff, Mr. Minko male NaN 0
740 Hawksford, Mr. Walter James male NaN 0
760 Garfirth, Mr. John male NaN 0
766 Brewe, Dr. Arthur Jackson male NaN 0
768 Moran, Mr. Daniel J male NaN 1
773 Elias, Mr. Dibo male NaN 0
776 Tobin, Mr. Roger male NaN 0
778 Kilgannon, Mr. Thomas J male NaN 0
783 Johnston, Mr. Andrew G male NaN 1
790 Keane, Mr. Andrew "Andy" male NaN 0
792 Sage, Miss. Stella Anna female NaN 8
793 Hoyt, Mr. William Fisher male NaN 0
815 Fry, Mr. Richard male NaN 0
825 Flynn, Mr. John male NaN 0
826 Lam, Mr. Len male NaN 0
828 McCormack, Mr. Thomas Joseph male NaN 0
832 Saad, Mr. Amin male NaN 0
837 Sirota, Mr. Maurice male NaN 0
839 Marechal, Mr. Pierre male NaN 0
846 Sage, Mr. Douglas Bullen male NaN 8
849 Goldenberg, Mrs. Samuel L (Edwiga Grabowska) female NaN 1
859 Razi, Mr. Raihed male NaN 0
863 Sage, Miss. Dorothy Edith "Dolly" female NaN 8
868 van Melkebeke, Mr. Philemon male NaN 0
878 Laleff, Mr. Kristo male NaN 0
888 Johnston, Miss. Catherine Helen "Carrie" female NaN 1
Parch Ticket Fare Cabin Embarked
630 0 27042 30.0000 A23 S
851 0 347060 7.7750 NaN S
493 0 PC 17609 49.5042 NaN C
96 0 PC 17754 34.6542 A5 C
116 0 370369 7.7500 NaN Q
672 0 C.A. 24580 10.5000 NaN S
745 1 WE/P 5735 71.0000 B22 S
33 0 C.A. 24579 10.5000 NaN S
54 1 113509 61.9792 B30 C
280 0 336439 7.7500 NaN Q
456 0 13509 26.5500 E38 S
438 4 19950 263.0000 C23 C25 C27 S
545 0 693 26.0000 NaN S
275 0 13502 77.9583 D7 S
483 0 4134 9.5875 NaN S
570 0 S.W./PP 752 10.5000 NaN S
252 0 113514 26.5500 C87 S
829 0 113572 80.0000 B28 NaN
555 0 113807 26.5500 NaN S
625 0 36963 32.3208 D50 S
326 0 345364 6.2375 NaN S
170 0 111240 33.5000 B19 S
684 1 29750 39.0000 NaN S
694 0 113800 26.5500 NaN S
587 1 13567 79.2000 B41 C
366 0 110813 75.2500 D37 C
94 0 364500 7.2500 NaN S
232 0 237442 13.5000 NaN S
268 1 PC 17582 153.4625 C125 S
11 0 113783 26.5500 C103 S
.. ... ... ... ... ...
718 0 36568 15.5000 NaN Q
727 0 36866 7.7375 NaN Q
732 0 239855 0.0000 NaN S
738 0 349201 7.8958 NaN S
739 0 349218 7.8958 NaN S
740 0 16988 30.0000 D45 S
760 0 358585 14.5000 NaN S
766 0 112379 39.6000 NaN C
768 0 371110 24.1500 NaN Q
773 0 2674 7.2250 NaN C
776 0 383121 7.7500 F38 Q
778 0 36865 7.7375 NaN Q
783 2 W./C. 6607 23.4500 NaN S
790 0 12460 7.7500 NaN Q
792 2 CA. 2343 69.5500 NaN S
793 0 PC 17600 30.6958 NaN C
815 0 112058 0.0000 B102 S
825 0 368323 6.9500 NaN Q
826 0 1601 56.4958 NaN S
828 0 367228 7.7500 NaN Q
832 0 2671 7.2292 NaN C
837 0 392092 8.0500 NaN S
839 0 11774 29.7000 C47 C
846 2 CA. 2343 69.5500 NaN S
849 0 17453 89.1042 C92 C
859 0 2629 7.2292 NaN C
863 2 CA. 2343 69.5500 NaN S
868 0 345777 9.5000 NaN S
878 0 349217 7.8958 NaN S
888 2 W./C. 6607 23.4500 NaN S
[891 rows x 12 columns]
PassengerId Survived Pclass Name Sex \
0 631 1 1 Barkworth, Mr. Algernon Henry Wilson male
1 852 0 3 Svensson, Mr. Johan male
2 494 0 1 Artagaveytia, Mr. Ramon male
3 97 0 1 Goldschmidt, Mr. George B male
4 117 0 3 Connors, Mr. Patrick male
5 673 0 2 Mitchell, Mr. Henry Michael male
6 746 0 1 Crosby, Capt. Edward Gifford male
7 34 0 2 Wheadon, Mr. Edward H male
8 55 0 1 Ostby, Mr. Engelhart Cornelius male
9 281 0 3 Duane, Mr. Frank male
10 457 0 1 Millet, Mr. Francis Davis male
Age SibSp Parch Ticket Fare Cabin Embarked
0 80.0 0 0 27042 30.0000 A23 S
1 74.0 0 0 347060 7.7750 NaN S
2 71.0 0 0 PC 17609 49.5042 NaN C
3 71.0 0 0 PC 17754 34.6542 A5 C
4 70.5 0 0 370369 7.7500 NaN Q
5 70.0 0 0 C.A. 24580 10.5000 NaN S
6 70.0 1 1 WE/P 5735 71.0000 B22 S
7 66.0 0 0 C.A. 24579 10.5000 NaN S
8 65.0 0 1 113509 61.9792 B30 C
9 65.0 0 0 336439 7.7500 NaN Q
10 65.0 0 0 13509 26.5500 E38 S
PassengerId 100
Survived 0
Pclass 2
Name Kantor, Mr. Sinai
Sex male
Age 34
SibSp 1
Parch 0
Ticket 244367
Fare 26
Cabin NaN
Embarked S
dtype: object
PassengerId 0
Survived 0
Pclass 0
Name 0
Sex 0
Age 177
SibSp 0
Parch 0
Ticket 0
Fare 0
Cabin 687
Embarked 2
dtype: int64
0 Third Class
1 First Class
2 Third Class
3 First Class
4 Third Class
5 Third Class
6 First Class
7 Third Class
8 Third Class
9 Second Class
10 Third Class
11 First Class
12 Third Class
13 Third Class
14 Third Class
15 Second Class
16 Third Class
17 Second Class
18 Third Class
19 Third Class
20 Second Class
21 Second Class
22 Third Class
23 First Class
24 Third Class
25 Third Class
26 Third Class
27 First Class
28 Third Class
29 Third Class
...
861 Second Class
862 First Class
863 Third Class
864 Second Class
865 Second Class
866 Second Class
867 First Class
868 Third Class
869 Third Class
870 Third Class
871 First Class
872 First Class
873 Third Class
874 Second Class
875 Third Class
876 Third Class
877 Third Class
878 Third Class
879 First Class
880 Second Class
881 Third Class
882 Third Class
883 Second Class
884 Third Class
885 Third Class
886 Second Class
887 First Class
888 Third Class
889 First Class
890 Third Class
Length: 891, dtype: object
Process finished with exit code 0