这下面的练习中,需要自己将spark的jar包 添加进来。
1.spark Mlib 底层使用的向量、矩阵运算使用了Breeze库。
scalaNLP 是一套 机器学习和数值技算的库。它主要是关于科学技术(sc)、机器学习(ML)和自然语言处理(NLP)的。它包括三个库,Breeze、Epic 和 Puck。
Breeze :是机器学习和数值技术库 ,它是sparkMlib的核心,包括线性代数、数值技术和优化,是一种通用、功能强大、有效的机器学习方法。
Epic :是一种高性能能统计分析器和结构化预测库
Puck :是一个快速GPU加速解析器
在使用Breeze 库时,需要导入相关包:
import breeze.linalg._
import breeze.numerics._
具体练习如下:
package leaning
import breeze.linalg._
import breeze.numerics._
import breeze.stats.distributions.Rand
/**
* Created by dy9776 on 2017/12/5.
*/
object Practise_breeze{
def main(args: Array[String]) {
val matrix: DenseMatrix[Double] = DenseMatrix.zeros[Double](3,2)
println(matrix)
/*
0.0 0.0
0.0 0.0
0.0 0.0
*/
//全0向量
val testVector: DenseVector[Double] = DenseVector.zeros[Double](2)
println(testVector)
//全1向量
val allOneVector=DenseVector.ones[Double](2)
println(allOneVector)
//按数值填充向量
val haveNumberFill =DenseVector.fill[Double](3,2)
println(haveNumberFill)
//生成随机向量
val rangeNUm= DenseVector.range(1, 10 , 2)//DenseVector(1, 3, 5, 7, 9)
val rangeNUmD= DenseVector.rangeD(1, 9 , 2)//DenseVector(1.0, 3.0, 5.0, 7.0)
val rangeNUmF= DenseVector.rangeF(1, 7 , 2)//DenseVector(1.0, 3.0, 5.0)
println(rangeNUm)
println(rangeNUmD)
println(rangeNUmF)
//单位矩阵
val unitMatrix=DenseMatrix.eye[Double](4)
// println(unitMatrix)
/*
1.0 0.0 0.0 0.0
0.0 1.0 0.0 0.0
0.0 0.0 1.0 0.0
0.0 0.0 0.0 1.0
*/
//对角矩阵
val doubleVecoter=diag(DenseVector(3.0, 4.0 , 5.0))
// println(doubleVecoter)
/*
3.0 0.0 0.0
0.0 4.0 0.0
0.0 0.0 5.0
*/
//按照行创建矩阵
val byRowCreateMatrix= DenseMatrix( (4.0, 5.0, 6.0 ) , (7.0 ,8.0 ,9.0))
// println(byRowCreateMatrix)
/*
4.0 5.0 6.0
7.0 8.0 9.0
*/
//按照行创建向量
val denseCreateVector = DenseVector((4.0, 5.0, 6.0, 7.0, 8.0, 9.0))
// println(denseCreateVector) ///DenseVector((4.0,5.0,6.0,7.0,8.0,9.0)
//向量装置
val vectorTranspostion= DenseVector( (4.0, 5.0, 6.0, 7.0, 8.0, 9.0) ).t
println(vectorTranspostion)//Transpose(DenseVector((4.0,5.0,6.0,7.0,8.0,9.0)))
//从函数创建向量
val funCreateVector=DenseVector.tabulate(5)(i=> i*i)
println(funCreateVector)//DenseVector(0, 1, 4, 9, 16)
val funCreateVector2=DenseVector.tabulate( 0 to 5)(i=> i*i)
println(funCreateVector2)//DenseVector(0, 1, 4, 9, 16, 25)
//从函数创建矩阵
val createFuncMatrix= DenseMatrix.tabulate(3, 4) {
case (i ,j ) => i*i + j*j
}
// println(createFuncMatrix)
/*
0 1 4 9
1 2 5 10
4 5 8 13
*/
//从数组创建矩阵
val createFunctionMatrix= new DenseMatrix[Double](3, 2, Array(1.0, 4.0, 7.0, 3.0, 6.0, 9.0))
// println(createFunctionMatrix)
/*
1.0 3.0
4.0 6.0
7.0 9.0
*/
//0 到 1的随机向量
val formZeroToOneRandomVector= DenseVector.rand( 9, Rand.uniform)
println(formZeroToOneRandomVector)
// DenseVector(0.7978222133507369, 0.48978247271729325, 0.24943434133065834, 0.6619447026155139, 0.5324942068054981, 0.9051865626036415, 0.5989291014099107, 0.9221881029987078, 0.17371486701192662)
val formZeroToOneRandomVector2= DenseVector.rand( 9, Rand.uniform)
println(formZeroToOneRandomVector2)
//DenseVector(0.7978222133507369, 0.48978247271729325, 0.24943434133065834, 0.6619447026155139, 0.5324942068054981, 0.9051865626036415, 0.5989291014099107, 0.9221881029987078, 0.17371486701192662)
//0 到 1 的随机矩阵
val formZeroToOneRandomMatrix= DenseMatrix.rand(3, 2, Rand.uniform)
println(formZeroToOneRandomMatrix)
/*
0.8036324612618653 0.538112087890035
0.6864375371630702 0.3123993272549075
0.9458628172312897 0.01137554621536796
*/
val formZeroToOneRandomMatrix2=DenseMatrix.rand(3, 2, Rand.gaussian)
println(formZeroToOneRandomMatrix2)
/*
0.9510499901472648 0.287812938654061
-0.5266499883462216 0.9380426076781263
-0.3959295333472151 -0.9057610233257112
*/
//Breeze元素访问
val a = new DenseVector[Int](Array(1 to 20 : _*))
println(a)//DenseVector(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)
//指定位置
println(a(0)) //1
//向量子集
println( a(1 to 4) )//DenseVector(2, 3, 4, 5)
println( a(1 until 4) )//DenseVector(2, 3, 4)
//指定开始位置至结尾
println( a(1 to -1) )//DenseVector(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)
//按照指定步长去子集 这个是倒序方式
println( a(5 to 0 by -1) )//DenseVector(6, 5, 4, 3, 2, 1)
//最后一个元素
println( a(-1)) //20
val m = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0))
println(m)
/*
1.0 2.0 3.0
4.0 5.0 6.0
*/
//指定位置
println( m(0 ,1) ) //2.0
//矩阵指定列
println( m(:: ,1) ) // DenseVector(2.0, 5.0)
//Breeze元素操作
//调整矩阵形状
val justAdjustMatrix =m.reshape(3, 2)
println(justAdjustMatrix)
/*
DenseVector(2.0, 5.0)
1.0 5.0
4.0 3.0
2.0 6.0
*/
//矩阵转成向量
val toVector=m.toDenseVector
println(toVector)//DenseVector(1.0, 4.0, 2.0, 5.0, 3.0, 6.0)
println(toVector.toDenseMatrix)//1.0 4.0 2.0 5.0 3.0 6.0
//复制下三角
println(lowerTriangular(m))//
/*
1.0 0.0
4.0 5.0
*/
//复制上三角
println(upperTriangular(m))
/*
1.0 2.0
0.0 5.0
*/
//矩阵复制
println(m.copy)
//m 原始为这样的
// 1.0 2.0
// 0.0 5.0
//取对角线元素
println(diag(upperTriangular(m)))
//DenseVector(1.0, 5.0) 个人觉得很怪异,不是应该为(1.0, 6.0)吗?上面上、下三角 也好像出乎的意料
//子集赋数值
println(a(1 to 4 ):=5)
//(1.0, 5.0)
//子集赋向量
println( a(1 to 4):=DenseVector(1,2,3,4) )
//DenseVector(1, 2, 3, 4)
println(m)
//矩阵赋值
// println( m( 1 to 2, 1 to 2) := 0.0 )
//Exception in thread "main" java.lang.IndexOutOfBoundsException: Row slice of Range(1, 2) was bigger than matrix rows of 2
println("-==========m1================-")
println( m( 0 to 1, 1 to 2) := 0.0 )
println("-==========m================-")
println(m)
println("-==========m end================-")
/*
-==========m1================-
0.0 0.0
0.0 0.0
-==========m================-
0.0 0.0 3.0
0.0 0.0 6.0
-==========m end================-
*/
//矩阵列赋值
val re=m(::, 2) := 5.0
println(re.toDenseMatrix)
//5.0 5.0
val a1 = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0))
val a2 = DenseMatrix((7.0, 8.0, 9.0), (10.0, 11.0, 12.0))
//垂直连接矩阵
val verticalLike=DenseMatrix.vertcat(a1, a2)
println(verticalLike)
println("-==========================-")
/*
1.0 2.0 3.0
4.0 5.0 6.0
7.0 8.0 9.0
10.0 11.0 12.0
*/
//横向连接矩阵
val twoMatrixConn=DenseMatrix.horzcat( a1, a2)
println(twoMatrixConn)
println("-==========================-")
/*
1.0 2.0 3.0 7.0 8.0 9.0
4.0 5.0 6.0 10.0 11.0 12.0
*/
//向量的连接
val connnectVector1=DenseVector.vertcat(DenseVector(20, 21, 22), DenseVector(23, 24, 25))
val connnectVector2=DenseVector.horzcat(DenseVector(20, 21, 22), DenseVector(23, 24, 25))
println(connnectVector1)//DenseVector(20, 21, 22, 23, 24, 25)
println(connnectVector2)
/*
20 23
21 24
22 25
*/
//Breeze数值计算函数
//元素加法
println(a1 + a2)
/*
8.0 10.0 12.0
14.0 16.0 18.0
*/
//元素乘法
println(a1 :* a2)
/*
7.0 16.0 27.0
40.0 55.0 72.0
*/
//元素除法
println(a1 :/ a2)
/*
0.14285714285714285 0.25 0.3333333333333333
0.4 0.45454545454545453 0.5
*/
//元素比较
println(a1 :< a2)
/*
true true true
true true true
*/
//元素相等
println(a1 :== a2)
/*
false false false
false false false
*/
//元素追加
println(a1 :+=2.0)
/*
3.0 4.0 5.0
6.0 7.0 8.0
*/
//元素追乘
println(a1 :*=2.0)
/*
6.0 8.0 10.0
12.0 14.0 16.0
*/
//向量点积
val vectorDot=DenseVector(1, 2, 3, 4) dot DenseVector(1, 1, 1, 1)
println(vectorDot)//10
//元素最大值
println(max(a1))//16.0
//元素最小值
println(min(a1))//6.0
//元素最大值的位置
println(argmax(a1))// (1,2)
//元素最小值的位置
println(argmin(a1))// (0,0)
//Breeze求和函数
val m1 = DenseMatrix((1.0, 2.0, 3.0, 4.0), (5.0, 6.0, 7.0, 8.0), (9.0, 10.0, 11.0, 12.0))
println(m1)
/*
1.0 2.0 3.0 4.0
5.0 6.0 7.0 8.0
9.0 10.0 11.0 12.0
*/
println("-==========================-")
//元素求和
println(sum(m1))//78.0
//每一列求和
println(sum(m1, Axis._0))//res59: breeze.linalg.DenseMatrix[Double] = 15.0 18.0 21.0 24.0
//每一行求和
println(sum(m1, Axis._1))//res60: breeze.linalg.DenseVector[Double] = DenseVector(10.0, 26.0, 42.0)
//对角线元素和
println(trace(lowerTriangular(m1)))// res61: Double = 18.0
//累积和
val a3 = new DenseVector[Int](Array(10 to 20: _*))
println(accumulate(a3)) // DenseVector(10, 21, 33, 46, 60, 75, 91, 108, 126, 145, 165)
//Breeze布尔函数
val c = DenseVector(true, false, true)
val d = DenseVector(false, true, true)
//元素与操作
println(c :& d) // DenseVector(false, false, true)
//元素或操作
println(c :| d) //DenseVector(true, true, true)
//元素非操作
println(!c) //DenseVector(false, true, false)
val e = DenseVector[Int](-3, 0, 2)
//存在非零元素
println(any(e)) //true
//所有元素非零
println(all(e)) //false
//Breeze线性代数函数
val f = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0), (7.0, 8.0, 9.0))
val g = DenseMatrix((1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0))
//线性求解,AX = B,求解X
println(f \ g)
/* breeze.linalg.DenseMatrix[Double] =
-2.5 -2.5 -2.5
4.0 4.0 4.0
-1.5 -1.5 -1.5
*/
//转置
println(f.t)
/* breeze.linalg.DenseMatrix[Double] =
1.0 4.0 7.0
2.0 5.0 8.0
3.0 6.0 9.0
*/
//求特征值
println(det(f)) // Double = 6.661338147750939E-16
//求逆
println(inv(f))
/*
-4.503599627370499E15 9.007199254740992E15 -4.503599627370495E15
9.007199254740998E15 -1.8014398509481984E16 9.007199254740991E15
-4.503599627370498E15 9.007199254740992E15 -4.5035996273704955E15
*/
//求伪逆
println(pinv(f))
/*
-3.7720834019330525E14 7.544166803866101E14 -3.77208340193305E14
7.544166803866094E14 -1.5088333607732208E15 7.544166803866108E14
-3.772083401933041E14 7.544166803866104E14 -3.772083401933055E14
*/
//特征值和特征向量
println(eig(f))
/*
Eig(DenseVector(16.116843969807043, -1.1168439698070427, -1.3036777264747022E-15),DenseVector(0.0, 0.0, 0.0),-0.23197068724628617 -0.7858302387420671 0.40824829046386363
-0.5253220933012336 -0.08675133925662833 -0.816496580927726
-0.8186734993561815 0.61232756022881 0.4082482904638625
)
*/
//奇异值分解
val svd.SVD(u,s,v) = svd(g)
println(u)
/*
-0.5773502691896255 -0.5773502691896257 -0.5773502691896256
-0.5773502691896256 -0.2113248654051871 0.7886751345948126
-0.5773502691896256 0.7886751345948129 -0.21132486540518708
*/
println("==============================")
println(s) //DenseVector(3.0000000000000004, 0.0, 0.0)
println("==============================")
println(v)
/*
-0.5773502691896256 -0.5773502691896257 -0.5773502691896256
0.0 -0.7071067811865474 0.7071067811865477
0.816496580927726 -0.4082482904638629 -0.4082482904638628
*/
//求矩阵的秩
println(rank(f)) //2
//矩阵长度
println(f.size) //9
//矩阵行数
println(f.rows) // 3
//矩阵列数
f.cols // 3
//Breeze取整函数
val h = DenseVector(-1.2, 0.7, 2.3) // breeze.linalg.DenseVector[Double] = DenseVector(-1.2, 0.7, 2.3)
//四舍五入
println( round(h) ) // breeze.linalg.DenseVector[Long] = DenseVector(-1, 1, 2)
//大于它的最小整数
println( ceil(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(-1.0, 1.0, 3.0)
//小于它的最大整数
println( floor(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(-2.0, 0.0, 2.0)
//符号函数
println( signum(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(-1.0, 1.0, 1.0)
//取正数
println( abs(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(1.2, 0.7, 2.3)
}
}