0.安装依赖包
#用pip安装python科学计算库numpy,sklearn,scipy
su - wget http://dl.fedoraproject.org/pub/epel/7/x86_64/e/epel-release-7-8.noarch.rpm yum install epel-release-7-8.noarch.rpm yum install python2-pip.noarch
yum install gcc-c++.x86_64 pip install --upgrade pip pip install numpy pip install sklearn pip install scipy
1.安装bazel
Bazel是一个类似于Make的工具,是Google为其内部软件开发的特点量身定制的工具,2015年开源。
cd ~ wget https://github.com/bazelbuild/bazel/archive/0.4.0.tar.gz tar xzvf 0.4.0.tar.gz cd bazel-0.4.0/ ./compile.sh sudo cp output/bazel /usr/bin/ which bazel #配置bash_completion bazel build //scripts:bazel-complete.bash sudo cp bazel-bin/scripts/bazel-complete.bash /etc/bash_completion.d/
2.安装TensorFlow
#下载TensorFlow源代码 git clone https://github.com/tensorflow/tensorflow
cd tensorflow/
./configure
#Create the pip package and install bazel build -c opt //tensorflow/tools/pip_package:build_pip_package bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg sudo pip install /tmp/tensorflow_pkg/tensorflow-*-cp27-none-linux_x86_64.whl
3.安装kcws - 97.5%准确率的深度学习中文分词(字嵌入+Bi-LSTM+CRF)https://github.com/koth/kcws
git clone https://github.com/koth/kcws.git cd kcws/ #下载语料库people2014.tar.gz tar xzvf people2014.tar.gz # 解压到~/kcws/2014 ./configure #编译后台服务 bazel build //kcws/cc:seg_backend_api python kcws/train/process_anno_file.py ./2014 chars_for_w2v.txt bazel build third_party/word2vec:word2vec #使用word2vec 训练 chars_for_w2v (注意-binary 0),得到字嵌入结果vec.txt ./bazel-bin/third_party/word2vec/word2vec -train chars_for_vec.txt -output kcws/models/vec.txt -size 50 -sample 1e-4 -negative 5 -hs 1 -binary 0 -iter 5 bazel build kcws/train:generate_training ./bazel-bin/kcws/train/generate_training kcws/models/vec.txt ./ all.txt python kcws/train/filter_sentence.py all.txt python kcws/train/train_cws_lstm.py --word2vec_path ./kcws/models/vec.txt --train_data_path ./train.txt --test_data_path test.txt --max_sentence_len 80 --learning_rate 0.001