搭建请参考上一篇文章。
export PYTHON_ROOT=/data/Python
export PYSPARK_PYTHON=${PYTHON_ROOT}/bin/python
export SPARK_YARN_USER_ENV=”PYSPARK_PYTHON=Python/bin/python”
遇到权限问题是普遍现象,层层排查
hdfs dfs -chmod 777 /user/hdfs
hdfs dfs -ls chmod 766 /user/hdfs
hdfs dfs -mkdir /user/hdfs/mnist_model
chmod hdfs:hdfs -R /data/TensorflowOnSpark
因为输出目录为yarn创建,所以确保路径的执行以及读写权限
spark-submit –master yarn –deploy-mode cluster –num-executors 3 –executor-memory 2g \
–queue default \
–py-files TensorFlowOnSpark/tfspark.zip,TensorFlowOnSpark/examples/mnist/tf/mnist_dist.py \
–conf spark.dynamicAllocation.enabled=false –conf spark.yarn.maxAppAttempts=1 \
–archives hdfs:///user/${USER}/Python.zip#Python \
–conf spark.executorEnv.LD_LIBRARY_PATH=”/data/cloudera/parcel