MLflow: the open-source platform for the machine leaning lifecycle, 管理machine learning整个生命周期的一款开源产品,主要提供了三种服务:
几乎支持市面上的所有Machine Learning frameworks, TensorFlow/PyTorch/Spark/SKlearn/R…
开源,并有着Databricks/Microsoft等一众公司的committer.
Seldon: the open-source platform to help deploy machine learning models, 主要focus在model的deployment
import os
import warnings
import sys
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
import mlflow
import mlflow.sklearn
def eval_metrics(actual, pred):
rmse = np.sqrt(mean_squared_error(actual, pred))
mae = mean_absolute_error(actual, pred)
r2 = r2_score(actual, pred)
return rmse, mae, r2
if __name__ == "__main__":
warnings.filterwarnings("ignore")
np.random.seed(40)
# Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "wine-quality.csv")
data = pd.read_csv(wine_path)
# Split the data into training and test sets. (0.75, 0.25) split.
train, test = train_test_split(data)
# The predicted column is "quality" which is a scalar from [3, 9]
train_x = train.drop(["quality"], axis=1)
test_x = test.drop(["quality"], axis=1)
train_y = train[["quality"]]
test_y = test[["quality"]]
alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5
l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5
mlflow.set_experiment('test')
with mlflow.start_run():
lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
lr.fit(train_x, train_y)
predicted_qualities = lr.predict(test_x)
(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)
print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
print(" RMSE: %s" % rmse)
print(" MAE: %s" % mae)
print(" R2: %s" % r2)
mlflow.log_param("alpha", alpha)
mlflow.log_param("l1_ratio", l1_ratio)
mlflow.log_metric("rmse", rmse)
mlflow.log_metric("r2", r2)
mlflow.log_metric("mae", mae)
mlflow.sklearn.log_model(lr, "model")
在对应的Model Storage下,可以看到MLmodel
文件这个文件内包含了很多信息:模型本身model.pkl
,模型产生的env conda.yaml
… 之后Seldon
会读取这部分信息去做deploy
artifact_path: model
flavors:
python_function:
data: model.pkl
env: conda.yaml
loader_module: mlflow.sklearn
python_version: 3.6.5
sklearn:
pickled_model: model.pkl
serialization_format: cloudpickle
sklearn_version: 0.21.3
run_id: 26f04f36493b4982a064bb8d6e9d9b30
Prerequisites:
curl https://raw.githubusercontent.com/helm/helm/master/scripts/get > get_helm.sh
chmod 777 get_helm.sh
./get_helm.sh
helm init #install Tiller, a deployment/service/pod of Tiller will be installed automatically in **kube-system** NS
#### create account for Tiller
kubectl create serviceaccount --namespace kube-system tiller
kubectl create clusterrolebinding tiller-cluster-rule --clusterrole=cluster-admin --serviceaccount=kube-system:tiller
kubectl patch deploy --namespace kube-system tiller-deploy -p '{"spec":{"template":{"spec":{"serviceAccount":"tiller"}}}}'
helm install \
seldon-core-operator \
--name seldon-core \
--repo https://storage.googleapis.com/seldon-charts \
--namespace seldon-system \
--set usagemetrics.enabled=true \
--set ambassador.enabled=true
helm install stable/ambassador --name ambassador --set crds.keep=false
kubectl rollout status deployment.apps/ambassador
Port forwarding:
#### run below command in another terminal
kubectl port-forward $(kubectl get pods -l app.kubernetes.io/name=ambassador -o jsonpath='{.items[0].metadata.name}') 8003:8080
# install Seldon Analytics with prometheus and grafana
helm install seldon-core-analytics --name seldon-core-analytics \
--repo https://storage.googleapis.com/seldon-charts \
--set grafana_prom_admin_password=password \
--set persistence.enabled=false
#### run below command in another terminal
kubectl port-forward \
$(kubectl get pods \
-l app=grafana-prom-server -o jsonpath='{.items[0].metadata.name}') \
3000:3000
apiVersion: machinelearning.seldon.io/v1alpha2
kind: SeldonDeployment
metadata:
name: test
spec:
name: rex
predictors:
- graph:
children: []
implementation: MLFLOW_SERVER
modelUri: s3://mlflow/xxx/artifacts/model
envSecretRefName: s3-secret
name: classifier
name: default
replicas: 1