n,设置直接取原数据中的多少数据。frac,设置取原数据的多少比例。
df = pd.DataFrame(np.random.randn(400).reshape(100,4))
df.head()
0 1 2 3
0 0.014483 0.044465 0.850342 0.764259
1 0.967565 0.174887 0.519950 0.450956
2 -0.669433 -0.629411 -1.288723 0.119729
3 0.995928 0.530986 0.398481 2.161532
4 -1.581833 0.758884 -0.018676 -0.205574
X_train1 = df.sample(n = 20,random_state = 0)
X_train1.head()
0 1 2 3
26 -0.034815 -0.427423 0.380800 0.365956
86 -1.556810 -2.761708 -0.290238 0.699914
2 -0.669433 -0.629411 -1.288723 0.119729
55 1.223293 0.506993 -0.466281 -0.375706
75 0.802527 -1.057052 0.749458 -0.191170
X_train2 = df.sample(frac = 0.2,random_state = 0) #random_state一样时,结果一样
X_train2.head()
0 1 2 3
26 -0.034815 -0.427423 0.380800 0.365956
86 -1.556810 -2.761708 -0.290238 0.699914
2 -0.669433 -0.629411 -1.288723 0.119729
55 1.223293 0.506993 -0.466281 -0.375706
75 0.802527 -1.057052 0.749458 -0.191170
X_test = df.drop(index = X_train1.index)
X_test.head()
0 1 2 3
0 0.014483 0.044465 0.850342 0.764259
1 0.967565 0.174887 0.519950 0.450956
3 0.995928 0.530986 0.398481 2.161532
4 -1.581833 0.758884 -0.018676 -0.205574
5 2.639475 -0.835314 -0.252094 -1.401924
from sklearn.model_selection import train_test_split
X_train, X_test ,y_train, y_test= train_test_split(X, y,test_size=0.2, rando
m_state = 20, shuffle=True)
X_train.head()
0 1 2
37 0.709817 0.343003 -0.211449
51 0.109385 0.479072 0.817219
35 0.206000 -0.783487 -0.694645
65 1.697172 -0.728110 0.445701
31 -1.622200 -1.202743 -0.035591
y_train.head()
37 0.069042
51 1.961325
35 -0.379156
65 -1.098712
31 -0.142196
Name: 3, dtype: float64