Graphlab是一个非常强大的解决ML问题的工具,对于大规模的数据处理具有很大的优势,最近在学习Coursera上的课程,用到Graphlab,做个总结。
import graphlab
读入文件
graphlab.SFrame('aaa.csv')
graphlab.SFrame('file_name')#Graphlab格式的文件
在IPython Notebook内展示图形曲线
graphlab.canvas.set_target('ipynb')
sales.show(view="Scatter Plot", x="sqft_living", y="price")
数据集划分
train_data,test_data = sales.random_split(.8,seed=0)
ML方法
training & predict
sqft_model = graphlab.linear_regression.create(train_data, target='price', features=['sqft_living'],validation_set=None)
sqft_model.predict(house2)
sentiment_model = graphlab.logistic_classifier.create(train_data,
target='sentiment',
features=['word_count'],
validation_set=test_data)
giraffe_reviews['predicted_sentiment'] = sentiment_model.predict(giraffe_reviews, output_type='probability')
评估
sqft_model.evaluate(test_data)
sqft_model.get('coefficients')
sqft_model['coefficients']
常用操作
#逻辑操作
products = products[products['rating'] != 3]
#添加新列
products['sentiment'] = products['rating'] >=4
#排序
giraffe_reviews = giraffe_reviews.sort('predicted_sentiment', ascending=False)