文档章节

Spark MLlib之使用Breeze操作矩阵向量

绝世武神
 绝世武神
发布于 2016/12/28 19:19
字数 1732
阅读 1099
收藏 1

在使用Breeze 库时,需要导入相关包:

import breeze.linalg._
import breeze.numerics._

Breeze创建函数

//全0矩阵
DenseMatrix.zeros[Double](3,2)

res0: breeze.linalg.DenseMatrix[Double] =
0.0  0.0  
0.0  0.0  
0.0  0.0  

//全0向量
DenseVector.zeros[Double](2)

res1: breeze.linalg.DenseVector[Double] = DenseVector(0.0, 0.0)

//全1向量
DenseVector.ones[Double](2)

res2: breeze.linalg.DenseVector[Double] = DenseVector(1.0, 1.0)

//按数值填充向量
DenseVector.fill[Double](3, 2)

res3: breeze.linalg.DenseVector[Double] = DenseVector(2.0, 2.0, 2.0)

//生成随机向量
DenseVector.range(1, 9, 2)
DenseVector.rangeD(1, 9, 2)
DenseVector.rangeF(1, 9, 2)

res4: breeze.linalg.DenseVector[Int] = DenseVector(1, 3, 5, 7)
res5: breeze.linalg.DenseVector[Double] = DenseVector(1.0, 3.0, 5.0, 7.0)
res6: breeze.linalg.DenseVector[Float] = DenseVector(1.0, 3.0, 5.0, 7.0)

//单位矩阵
DenseMatrix.eye[Double](4)

res7: breeze.linalg.DenseMatrix[Double] = 
1.0  0.0  0.0  0.0  
0.0  1.0  0.0  0.0  
0.0  0.0  1.0  0.0  
0.0  0.0  0.0  1.0  

//对角矩阵
diag(DenseVector(3.0, 4.0, 5.0))

res8: breeze.linalg.DenseMatrix[Double] = 
3.0  0.0  0.0  
0.0  4.0  0.0  
0.0  0.0  5.0 

//按照行创建矩阵
DenseMatrix((4.0, 5.0, 6.0), (7.0, 8.0, 9.0))

res9: breeze.linalg.DenseMatrix[Double] = 
4.0  5.0  6.0  
7.0  8.0  9.0  

//按照行创建向量
DenseVector((4.0, 5.0, 6.0, 7.0, 8.0, 9.0))

res10: breeze.linalg.DenseVector[(Double, Double, Double, Double, Double, Double)] = DenseVector((4.0,5.0,6.0,7.0,8.0,9.0))

//向量转置
DenseVector((4.0, 5.0, 6.0, 7.0, 8.0, 9.0)).t

res11: breeze.linalg.Transpose[breeze.linalg.DenseVector[(Double, Double, Double, Double, Double, Double)]] = Transpose(DenseVector((4.0,5.0,6.0,7.0,8.0,9.0)))

//从函数创建向量
DenseVector.tabulate(5)(i => i*i)
DenseVector.tabulate(0 to 5)(i => i*i)

res12: breeze.linalg.DenseVector[Int] = DenseVector(0, 1, 4, 9, 16)
res13: breeze.linalg.DenseVector[Int] = DenseVector(0, 1, 4, 9, 16, 25)

//从函数创建矩阵
DenseMatrix.tabulate(3, 4){ case (i, j) => i*i+j*j }

res14: breeze.linalg.DenseMatrix[Int] = 
0  1  4  9   
1  2  5  10  
4  5  8  13  

//从数组创建向量
new DenseVector[Double](Array(2.0, 5.0, 8.0))

res15: breeze.linalg.DenseVector[Double] = DenseVector(2.0, 5.0, 8.0)

//从数组创建矩阵
new DenseMatrix[Double](3, 2, Array(1.0, 4.0, 7.0, 3.0, 6.0, 9.0))

res16: breeze.linalg.DenseMatrix[Double] = 
1.0  3.0  
4.0  6.0  
7.0  9.0  

//0 到 1的随机向量
DenseVector.rand(9, Rand.uniform)
DenseVector.rand(9, Rand.gaussian)

res17: breeze.linalg.DenseVector[Double] = DenseVector(0.30960687979350654, 0.5779984012083466, 0.4880956198283952, 0.1013947992922748, 0.19635570812305936, 0.8533170989347008, 0.6619843996111201, 0.03131533370356321, 0.5430592884856604)
res18: breeze.linalg.DenseVector[Double] = DenseVector(0.48361471134641176, -1.734778260551877, -0.7319505628964431, 0.19971267958211184, -1.033191008131693, -1.7961545888066046, 0.2364555601503527, 0.22843047924270285, 1.7288956723034343)


//0 到 1的随机矩阵
DenseMatrix.rand(3, 2, Rand.uniform)
DenseMatrix.rand(3, 2, Rand.gaussian)

res19: breeze.linalg.DenseMatrix[Double] = 0.11270960774886585  0.19871332589909851  
0.5581898434134047   0.8295064603050235   
0.8692650535288642   0.4015512971620494   
res20: breeze.linalg.DenseMatrix[Double] = 0.712041684728872     2.7007736007506216   
0.053520407807479485  0.19044772577405517  
-0.7370909025873376   -1.024737052742153   

Breeze元素访问

val a = new DenseVector[Int](Array(10 to 20: _*))

a: breeze.linalg.DenseVector[Int] = DenseVector(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)

//指定位置
a(0)

res21: Int = 10

//向量子集
a(1 to 4)

res22: breeze.linalg.DenseVector[Int] = DenseVector(11, 12, 13, 14)

//按照指定步长取子集
a(5 to 0 by -1)

res23: breeze.linalg.DenseVector[Int] = DenseVector(15, 14, 13, 12, 11, 10)

//指定开始位置至结尾
a(1 to -1)

res24: breeze.linalg.DenseVector[Int] = DenseVector(11, 12, 13, 14, 15, 16, 17, 18, 19, 20)

//最后一个元素
a(-1)

res25: Int = 20

val m = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0))

m: breeze.linalg.DenseMatrix[Double] = 
1.0  2.0  3.0  
4.0  5.0  6.0  

//指定位置
m(0, 1)

res26: Double = 2.0

//矩阵指定列
m(::, 1)

res27: breeze.linalg.DenseVector[Double] = DenseVector(2.0, 6.0, 10.0)

Breeze元素操作

//调整矩阵形状
m.reshape(4, 3)

res28: breeze.linalg.DenseMatrix[Double] = 
1.0  6.0   11.0  
5.0  10.0  4.0   
9.0  3.0   8.0   
2.0  7.0   12.0 

//矩阵转成向量
m.toDenseVector

res29: breeze.linalg.DenseVector[Double] = DenseVector(1.0, 5.0, 9.0, 2.0, 6.0, 10.0, 3.0, 7.0, 11.0, 4.0, 8.0, 12.0)

//复制下三角
lowerTriangular(m)

res30: breeze.linalg.DenseMatrix[Double] = 
1.0  0.0   0.0   
5.0  6.0   0.0   
9.0  10.0  11.0  

//复制上三角
upperTriangular(m)

res31: breeze.linalg.DenseMatrix[Double] = 
1.0  2.0  3.0   
0.0  6.0  7.0   
0.0  0.0  11.0  

//矩阵复制
m.copy

res32: breeze.linalg.DenseMatrix[Double] = 
1.0  2.0   3.0   4.0   
5.0  6.0   7.0   8.0   
9.0  10.0  11.0  12.0  

//取对角线元素
diag(upperTriangular(m))

res33: breeze.linalg.DenseVector[Double] = DenseVector(1.0, 6.0, 11.0)

//子集赋数值
a(1 to 4) := 5
a

res34: breeze.linalg.DenseVector[Int] = DenseVector(5, 5, 5, 5)
res35: breeze.linalg.DenseVector[Int] = DenseVector(10, 5, 5, 5, 5, 15, 16, 17, 18, 19, 20)

//子集赋向量
a(1 to 4) := DenseVector(1, 2, 3, 4)
a

res36: breeze.linalg.DenseVector[Int] = DenseVector(1, 2, 3, 4)
res37: breeze.linalg.DenseVector[Int] = DenseVector(10, 1, 2, 3, 4, 15, 16, 17, 18, 19, 20)

//矩阵赋值
m(1 to 2,1 to 2) := 0.0
m

res38: breeze.linalg.DenseMatrix[Double] = 
0.0  0.0  
0.0  0.0  
res39: breeze.linalg.DenseMatrix[Double] = 
1.0  2.0  3.0  4.0   
5.0  0.0  0.0  8.0   
9.0  0.0  0.0  12.0 

//矩阵列赋值
m(::, 2) := 5.0
m

res40: breeze.linalg.DenseVector[Double] = DenseVector(5.0, 5.0, 5.0)
res41: breeze.linalg.DenseMatrix[Double] = 
1.0  2.0  5.0  4.0   
5.0  0.0  5.0  8.0   
9.0  0.0  5.0  12.0  

val a1 = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0))
val a2 = DenseMatrix((7.0, 8.0, 9.0), (10.0, 11.0, 12.0))

a1: breeze.linalg.DenseMatrix[Double] = 
1.0  2.0  3.0  
4.0  5.0  6.0  
a2: breeze.linalg.DenseMatrix[Double] = 
7.0   8.0   9.0   
10.0  11.0  12.0 

//垂直连接矩阵
DenseMatrix.vertcat(a1, a2)

res42: breeze.linalg.DenseMatrix[Double] = 
1.0   2.0   3.0   
4.0   5.0   6.0   
7.0   8.0   9.0   
10.0  11.0  12.0 

//横向连接矩阵
DenseMatrix.horzcat(a1, a2)

res43: breeze.linalg.DenseMatrix[Double] = 
1.0  2.0  3.0  7.0   8.0   9.0   
4.0  5.0  6.0  10.0  11.0  12.0  

//向量连接
DenseVector.vertcat(DenseVector(20, 21, 22), DenseVector(23, 24, 25))
DenseVector.horzcat(DenseVector(20, 21, 22), DenseVector(23, 24, 25))

res44: breeze.linalg.DenseVector[Int] = DenseVector(20, 21, 22, 23, 24, 25)
res45: breeze.linalg.DenseMatrix[Int] = 
20  23  
21  24  
22  25 

Breeze数值计算函数

//元素加法
a1 + a2

res46: breeze.linalg.DenseMatrix[Double] = 
8.0   10.0  12.0  
14.0  16.0  18.0  

//元素乘法
a1 :* a2

res47: breeze.linalg.DenseMatrix[Double] = 
7.0   16.0  27.0  
40.0  55.0  72.0  

//元素除法
a1 :/ a2

res48: breeze.linalg.DenseMatrix[Double] = 
0.14285714285714285  0.25                 0.3333333333333333  
0.4                  0.45454545454545453  0.5

//元素比较
a1 :< a2

res49: breeze.linalg.DenseMatrix[Boolean] = 
true  true  true  
true  true  true 

//元素相等
a1 :== a2

res50: breeze.linalg.DenseMatrix[Boolean] = 
false  false  false  
false  false  false 

//元素追加
a1 :+=2.0

res51: breeze.linalg.DenseMatrix[Double] = 
3.0  4.0  5.0  
6.0  7.0  8.0 

//元素追乘
a1 :*=2.0

res52: breeze.linalg.DenseMatrix[Double] = 
6.0   8.0   10.0  
12.0  14.0  16.0

//向量点积
DenseVector(1, 2, 3, 4) dot DenseVector(1, 1, 1, 1)

res53: Int = 10

//元素最大值
max(a1)

res54: Double = 16.0

//元素最小值
min(a1)

res55: Double = 6.0

//元素最大值的位置
argmax(a1)

res56: (Int, Int) = (1,2)

//元素最小值的位置
argmin(a1)

res57: (Int, Int) = (0,0)

Breeze求和函数

val m1 = DenseMatrix((1.0, 2.0, 3.0, 4.0), (5.0, 6.0, 7.0, 8.0), (9.0, 10.0, 11.0, 12.0))

m1: breeze.linalg.DenseMatrix[Double] = 
1.0  2.0   3.0   4.0   
5.0  6.0   7.0   8.0   
9.0  10.0  11.0  12.0  

//元素求和
sum(m1)

res58: Double = 78.0

//每一列求和
sum(m1, Axis._0)

res59: breeze.linalg.DenseMatrix[Double] = 15.0  18.0  21.0  24.0

//每一行求和
sum(m1, Axis._1)

res60: breeze.linalg.DenseVector[Double] = DenseVector(10.0, 26.0, 42.0)

//对角线元素和
trace(lowerTriangular(m1))

res61: Double = 18.0

//累积和
val a3 = new DenseVector[Int](Array(10 to 20: _*))
accumulate(a3)

a3: breeze.linalg.DenseVector[Int] = DenseVector(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)
res62: breeze.linalg.DenseVector[Int] = DenseVector(10, 21, 33, 46, 60, 75, 91, 108, 126, 145, 165)

Breeze布尔函数

val c = DenseVector(true, false, true)
val d = DenseVector(false, true, true)
//元素与操作
c :& d

res63: breeze.linalg.DenseVector[Boolean] = DenseVector(false, false, true)

//元素或操作
c :| d

res64: breeze.linalg.DenseVector[Boolean] = DenseVector(true, true, true)

//元素非操作
!c

res65: breeze.linalg.DenseVector[Boolean] = DenseVector(false, true, false)

val e = DenseVector[Int](-3, 0, 2)

e: breeze.linalg.DenseVector[Int] = DenseVector(-3, 0, 2)

//存在非零元素
any(e)

res66: Boolean = true

//所有元素非零
all(e)

res67: Boolean = false

Breeze线性代数函数

val f = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0), (7.0, 8.0, 9.0))
val g = DenseMatrix((1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0))

f: breeze.linalg.DenseMatrix[Double] = 
1.0  2.0  3.0  
4.0  5.0  6.0  
7.0  8.0  9.0  
g: breeze.linalg.DenseMatrix[Double] = 
1.0  1.0  1.0  
1.0  1.0  1.0  
1.0  1.0  1.0  

//线性求解,AX = B,求解X
f \ g

res68: breeze.linalg.DenseMatrix[Double] = 
-2.5  -2.5  -2.5  
4.0   4.0   4.0   
-1.5  -1.5  -1.5

//转置
f.t

res69: breeze.linalg.DenseMatrix[Double] = 
1.0  4.0  7.0  
2.0  5.0  8.0  
3.0  6.0  9.0 

//求特征值
det(f)

res70: Double = 6.661338147750939E-16

//求逆
inv(f)

res71: breeze.linalg.DenseMatrix[Double] = 
-4.503599627370499E15  9.007199254740992E15    -4.503599627370495E15   
9.007199254740998E15   -1.8014398509481984E16  9.007199254740991E15    
-4.503599627370498E15  9.007199254740992E15    -4.5035996273704955E15

//求伪逆
pinv(f)

res72: breeze.linalg.DenseMatrix[Double] = 
-3.7720834019330525E14  7.544166803866101E14    -3.77208340193305E14   
7.544166803866094E14    -1.5088333607732208E15  7.544166803866108E14   
-3.772083401933041E14   7.544166803866104E14    -3.772083401933055E14  

//特征值和特征向量
eig(f)

res73: breeze.linalg.eig.DenseEig = 
Eig(DenseVector(16.116843969807043, -1.1168439698070427, -1.3036777264747022E-15),
   DenseVector(0.0, 0.0, 0.0),
-0.23197068724628617  -0.7858302387420671   0.40824829046386363  
-0.5253220933012336   -0.08675133925662833  -0.816496580927726   
-0.8186734993561815   0.61232756022881      0.4082482904638625  )

//奇异值分解
val svd.SVD(u,s,v) = svd(g)

u: breeze.linalg.DenseMatrix[Double] = 
-0.5773502691896255  -0.5773502691896257  -0.5773502691896256   
-0.5773502691896256  -0.2113248654051871  0.7886751345948126    
-0.5773502691896256  0.7886751345948129   -0.21132486540518708  
s: breeze.linalg.DenseVector[Double] = DenseVector(3.0000000000000004, 0.0, 0.0)
v: breeze.linalg.DenseMatrix[Double] = 
-0.5773502691896256  -0.5773502691896257  -0.5773502691896256  
0.0                  -0.7071067811865474  0.7071067811865477   
0.816496580927726    -0.4082482904638629  -0.4082482904638628

//求矩阵的秩
rank(f)

res74: Int = 2

//矩阵长度
f.size

res75: Int = 9

//矩阵行数
f.rows

res76: Int = 3

//矩阵列数
f.cols

res77: Int = 3

Breeze取整函数

val h = DenseVector(-1.2, 0.7, 2.3)

h: breeze.linalg.DenseVector[Double] = DenseVector(-1.2, 0.7, 2.3)

//四舍五入
round(h)

res78: breeze.linalg.DenseVector[Long] = DenseVector(-1, 1, 2)

//大于它的最小整数
ceil(h)

res79: breeze.linalg.DenseVector[Double] = DenseVector(-1.0, 1.0, 3.0)

//小于它的最大整数
floor(h)

res80: breeze.linalg.DenseVector[Double] = DenseVector(-2.0, 0.0, 2.0)

//符号函数
signum(h)

res81: breeze.linalg.DenseVector[Double] = DenseVector(-1.0, 1.0, 1.0)

//取正数
abs(h)

res82: breeze.linalg.DenseVector[Double] = DenseVector(1.2, 0.7, 2.3)

© 著作权归作者所有

绝世武神
粉丝 20
博文 33
码字总数 48343
作品 0
海淀
程序员
私信 提问
基于Spark的机器学习实践 (二) - 初识MLlib

1 MLlib概述 1.1 MLlib 介绍 ◆ 是基于Spark core的机器学习库,具有Spark的优点 ◆ 底层计算经过优化,比常规编码效率往往要高 ◆ 实现了多种机器学习算法,可以进行模型训练及预测 1.2 Spark ...

javaedge
04/09
0
0
spark机器学习中的基本数据类型

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/on2way/article/details/84670783 今天开始记录spark中机器学习的相关应用。 spark某种意义上讲就是为机器学习...

我i智能
2018/12/01
0
0
Spark on Angel:Spark机器学习的核心加速器

Spark的核心概念是RDD,而RDD的关键特性之一是其不可变性,来规避分布式环境下复杂的各种并行问题。这个抽象,在数据分析的领域是没有问题的,它能最大化的解决分布式问题,简化各种算子的复...

腾讯开源
2017/08/01
3
0
地铁译:Spark for python developers ---Spark与数据的机器学习

机器学习可以从数据中得到有用的见解. 目标是纵观Spark MLlib,采用合适的算法从数据集中生成见解。对于 Twitter的数据集, 采用非监督集群算法来区分与Apache
Spark相关的tweets . 初始输入...

abel_cao
01/17
0
0
Spark的39个机器学习库-中文

//Apache Spark 本身// 1.MLlib >AMPLab Spark最初诞生于伯克利 AMPLab实验室,如今依然还是AMPLab所致力的项目,尽管这些不处于Apache Spark Foundation中,但是依然在你日常的github项目中...

MoksMo
2015/11/04
854
0

没有更多内容

加载失败,请刷新页面

加载更多

Giraph源码分析(八)—— 统计每个SuperStep中参与计算的顶点数目

作者|白松 目的:科研中,需要分析在每次迭代过程中参与计算的顶点数目,来进一步优化系统。比如,在SSSP的compute()方法最后一行,都会把当前顶点voteToHalt,即变为InActive状态。所以每次...

数澜科技
今天
4
0
Xss过滤器(Java)

问题 最近旧的系统,遇到Xss安全问题。这个系统采用用的是spring mvc的maven工程。 解决 maven依赖配置 <properties><easapi.version>2.2.0.0</easapi.version></properties><dependenci......

亚林瓜子
今天
10
0
Navicat 快捷键

操作 结果 ctrl+q 打开查询窗口 ctrl+/ 注释sql语句 ctrl+shift +/ 解除注释 ctrl+r 运行查询窗口的sql语句 ctrl+shift+r 只运行选中的sql语句 F6 打开一个mysql命令行窗口 ctrl+l 删除一行 ...

低至一折起
今天
9
0
Set 和 Map

Set 1:基本概念 类数组对象, 内部元素唯一 let set = new Set([1, 2, 3, 2, 1]); console.log(set); // Set(3){ 1, 2, 3 } [...set]; // [1, 2, 3] 接收数组或迭代器对象 ...

凌兮洛
今天
4
0
PyTorch入门笔记一

张量 引入pytorch,生成一个随机的5x3张量 >>> from __future__ import print_function>>> import torch>>> x = torch.rand(5, 3)>>> print(x)tensor([[0.5555, 0.7301, 0.5655],......

仪山湖
今天
6
0

没有更多内容

加载失败,请刷新页面

加载更多

返回顶部
顶部