在 R 编程中使用稀疏矩阵

稀疏矩阵是稀疏填充的元素集合，其中非空元素的数量非常少。在完全密集的矩阵中存储稀疏填充的数据会导致时间和空间的复杂性增加。因此，优化了数据结构以更有效地存储这些数据并减少元素的访问时间。

创建稀疏矩阵

R 有一个内置的包“矩阵”，它提供了用于创建和处理稀疏矩阵的类。

library(Matrix)

以下代码片段说明了矩阵库的用法：

R

# installing the matrix library 
library('Matrix')
  
# declaring matrix of 1000 rows and 1000 cols
mat1 <- Matrix(0, nrow = 1000, 
                  ncol = 1000, 
                  sparse = TRUE)
  
# setting the value at 1st row 
# and 1st col to be 1
mat1[1][1]<-5
  
print ("Size of sparse mat1")
print (object.size(mat1))

R

library(Matrix)
  
# construct a matrix with values
#   0 with probability 0.80
#   6 with probability 0.10
#   7 with probability 0.10
set.seed(0)
rows <- 4L
cols <- 6L
vals <- sample(
  x = c(0, 6, 7), 
  prob = c(0.8, 0.1, 0.1), 
  size = rows * cols, 
  replace = TRUE
)
  
dense_mat <- matrix(vals, nrow = rows)
print("Dense Matrix")
print(dense_mat)
  
# Convert to sparse 
sparse_mat <- as(dense_mat, 
                "sparseMatrix")
print("Sparse Matrix")
print(sparse_mat)

R

# Loading Library
library(Matrix)
  
# construct a matrix with values
#   0 with probability 0.80
#   6 with probability 0.10
#   7 with probability 0.10
   
set.seed(0)
rows <- 4L
cols <- 6L
vals <- sample(
  x = c(0, 10), 
  prob = c(0.85, 0.15), 
  size = rows * cols, 
  replace = TRUE
)
dense_mat <- matrix(vals, nrow = rows)
  
# Convert to sparse 
sparse_mat <- as(dense_mat, "sparseMatrix")
print("Sparse Matrix")
print(sparse_mat)
print("Addition")
  
# adding a scalar value 5 
# to the sparse matrix 
print(sparse_mat + 5)
print("Subtraction")
  
# subtracting a scalar value 1 
# to the sparse matrix 
print(sparse_mat - 1)

R

# library(Matrix)
# construct a matrix with values
#   0 with probability 0.80
#   6 with probability 0.10
#   7 with probability 0.10
set.seed(0)
rows <- 4L
cols <- 6L
vals <- sample(
  x = c(0, 10), 
  prob = c(0.85, 0.15), 
  size = rows * cols, 
  replace = TRUE
)
dense_mat <- matrix(vals, nrow = rows)
  
# Convert to sparse 
sparse_mat <- as(dense_mat, "sparseMatrix")
print("Sparse Matrix")
print(sparse_mat)
print("Multiplication")
  
# multiplying a scalar value 10 
# to the sparse matrix 
print(sparse_mat * 10)
print("Division")
  
# dividing a scalar value 10
# to the sparse matrix 
print(sparse_mat / 10)

R

library(Matrix)
  
# construct a matrix with values
#   0 with probability 0.80
#   6 with probability 0.10
#   7 with probability 0.10
set.seed(0)
rows <- 4L
cols <- 6L
vals <- sample(
  x = c(0, 10), 
  prob = c(0.85, 0.15), 
  size = rows * cols, 
  replace = TRUE
)
dense_mat <- matrix(vals, nrow = rows)
  
# Convert to sparse 
sparse_mat <- as(dense_mat, "sparseMatrix")
print("Sparse Matrix")
print(sparse_mat)
  
# computing transpose of matrix 
transpose_mat = t(sparse_mat)
  
# computing multiplication of matrix
# and its transpose
mul_mat = sparse_mat %*% transpose_mat
print("Multiplication of Matrices")
print(mul_mat)

R

library(Matrix)
  
# construct a matrix with values
#   0 with probability 0.80
#   6 with probability 0.10
#   7 with probability 0.10
set.seed(0)
rows <- 4L
cols <- 6L
vals <- sample(
  x = c(0, 10), 
  prob = c(0.85, 0.15), 
  size = rows * cols, 
  replace = TRUE
)
dense_mat <- matrix(vals, nrow = rows)
  
# Convert to sparse 
sparse_mat <- as(dense_mat, "sparseMatrix")
print("Sparse Matrix")
print(sparse_mat)
  
# declaring a vector 
vec <- c(3, 2)
print("Multiplication by vector")
print(sparse_mat * vec)

R

library(Matrix)
  
# construct a matrix with values
#   0 with probability 0.80
#   6 with probability 0.10
#   7 with probability 0.10
set.seed(0)
rows <- 4L
cols <- 6L
vals <- sample(
  x = c(0, 10), 
  prob = c(0.85, 0.15), 
  size = rows * cols, 
  replace = TRUE
)
dense_mat <- matrix(vals, nrow = rows)
  
# Convert to sparse 
sparse_mat <- as(dense_mat, "sparseMatrix")
print("Sparse Matrix")
print(sparse_mat)
  
# combining matrix through rows
row_bind <- rbind(sparse_mat,
                  sparse_mat)
  
# printing matrix after row bind
print ("Row Bind")
print (row_bind)

R

library(Matrix)
  
# declaring original matrix 
mat <- matrix(data = c(5.5, 0, NA, 
                         0, 0, NA), nrow = 3)
print("Original Matrix")
print(mat)
sparse_mat <- as(mat, "sparseMatrix")
print("Sparse Matrix")
print(sparse_mat)

输出：

[1] "Size of sparse mat1"
5440 bytes

稀疏矩阵占用的空间大大减少，因为它只为非零值节省了空间。

从密集构造稀疏矩阵

可以通过 R 中的内置matrix()命令简单地创建密集矩阵。然后将密集矩阵作为输入输入到隐式嵌入在 R 中的as()函数中。该函数具有以下签名：

Syntax: as(dense_matrix, type = )

Parameters:

dense_matrix : A numeric or logical array.

type : Default evaluates to dgCMatrix, in case we mention sparseMatrix. This converts the matrix to compressed sparse column( CSC ) format. The other type available is the dgRMatrix, which converts the dense matrix in sparse row format.

编程需要懂一点英语

下面的代码片段表示稠密矩阵到稀疏矩阵的转换：

电阻

library(Matrix)
  
# construct a matrix with values
#   0 with probability 0.80
#   6 with probability 0.10
#   7 with probability 0.10
set.seed(0)
rows <- 4L
cols <- 6L
vals <- sample(
  x = c(0, 6, 7), 
  prob = c(0.8, 0.1, 0.1), 
  size = rows * cols, 
  replace = TRUE
)
  
dense_mat <- matrix(vals, nrow = rows)
print("Dense Matrix")
print(dense_mat)
  
# Convert to sparse 
sparse_mat <- as(dense_mat, 
                "sparseMatrix")
print("Sparse Matrix")
print(sparse_mat)

输出：

[1] "Dense Matrix"
    [,1] [,2] [,3] [,4] [,5] [,6]
[1,]    7    6    0    0    0    0
[2,]    0    0    0    0    0    6
[3,]    0    7    0    0    6    0
[4,]    0    6    0    0    0    0
[1] "Sparse Matrix"
4 x 6 sparse Matrix of class "dgCMatrix"
               
[1,] 7 6 . . . .
[2,] . . . . . 6
[3,] . 7 . . 6 .
[4,] . 6 . . . .

稀疏矩阵运算

可以对稀疏矩阵执行各种算术和绑定操作：

标量值加减法

将标量值与稀疏矩阵的所有元素相加或相减。结果矩阵是一个稠密矩阵，因为所有元素都对标量值进行运算。以下代码表示 + 或 –运算符的用法：

电阻

# Loading Library
library(Matrix)
  
# construct a matrix with values
#   0 with probability 0.80
#   6 with probability 0.10
#   7 with probability 0.10
   
set.seed(0)
rows <- 4L
cols <- 6L
vals <- sample(
  x = c(0, 10), 
  prob = c(0.85, 0.15), 
  size = rows * cols, 
  replace = TRUE
)
dense_mat <- matrix(vals, nrow = rows)
  
# Convert to sparse 
sparse_mat <- as(dense_mat, "sparseMatrix")
print("Sparse Matrix")
print(sparse_mat)
print("Addition")
  
# adding a scalar value 5 
# to the sparse matrix 
print(sparse_mat + 5)
print("Subtraction")
  
# subtracting a scalar value 1 
# to the sparse matrix 
print(sparse_mat - 1)

输出：

[1] "Sparse Matrix"
4 x 6 sparse Matrix of class "dgCMatrix"
                   
[1,] 10 10 . .  .  .
[2,]  .  . . .  . 10
[3,]  . 10 . . 10  .
[4,]  . 10 . .  .  .
[1] "Addition"
4 x 6 Matrix of class "dgeMatrix"
    [,1] [,2] [,3] [,4] [,5] [,6]
[1,]   15   15    5    5    5    5
[2,]    5    5    5    5    5   15
[3,]    5   15    5    5   15    5
[4,]    5   15    5    5    5    5
[1] "Subtraction"
4 x 6 Matrix of class "dgeMatrix"
    [,1] [,2] [,3] [,4] [,5] [,6]
[1,]    9    9   -1   -1   -1   -1
[2,]   -1   -1   -1   -1   -1    9
[3,]   -1    9   -1   -1    9   -1
[4,]   -1    9   -1   -1   -1   -1

标量的乘法或除法

这些操作对矩阵的所有非零元素执行。结果矩阵是一个稀疏矩阵：

电阻

# library(Matrix)
# construct a matrix with values
#   0 with probability 0.80
#   6 with probability 0.10
#   7 with probability 0.10
set.seed(0)
rows <- 4L
cols <- 6L
vals <- sample(
  x = c(0, 10), 
  prob = c(0.85, 0.15), 
  size = rows * cols, 
  replace = TRUE
)
dense_mat <- matrix(vals, nrow = rows)
  
# Convert to sparse 
sparse_mat <- as(dense_mat, "sparseMatrix")
print("Sparse Matrix")
print(sparse_mat)
print("Multiplication")
  
# multiplying a scalar value 10 
# to the sparse matrix 
print(sparse_mat * 10)
print("Division")
  
# dividing a scalar value 10
# to the sparse matrix 
print(sparse_mat / 10)

输出：

[1] "Sparse Matrix"
4 x 6 sparse Matrix of class "dgCMatrix"
                   
[1,] 10 10 . .  .  .
[2,]  .  . . .  . 10
[3,]  . 10 . . 10  .
[4,]  . 10 . .  .  .
[1] "Multiplication"
4 x 6 sparse Matrix of class "dgCMatrix"
                       
[1,] 100 100 . .   .   .
[2,]   .   . . .   . 100
[3,]   . 100 . . 100   .
[4,]   . 100 . .   .   .
[1] "Division"
4 x 6 sparse Matrix of class "dgCMatrix"
               
[1,] 1 1 . . . .
[2,] . . . . . 1
[3,] . 1 . . 1 .
[4,] . 1 . . . .

矩阵乘法

矩阵可以相互相乘，无论稀疏还是密集。但是，第一个矩阵的列应该等于第二个矩阵的行。

电阻

library(Matrix)
  
# construct a matrix with values
#   0 with probability 0.80
#   6 with probability 0.10
#   7 with probability 0.10
set.seed(0)
rows <- 4L
cols <- 6L
vals <- sample(
  x = c(0, 10), 
  prob = c(0.85, 0.15), 
  size = rows * cols, 
  replace = TRUE
)
dense_mat <- matrix(vals, nrow = rows)
  
# Convert to sparse 
sparse_mat <- as(dense_mat, "sparseMatrix")
print("Sparse Matrix")
print(sparse_mat)
  
# computing transpose of matrix 
transpose_mat = t(sparse_mat)
  
# computing multiplication of matrix
# and its transpose
mul_mat = sparse_mat %*% transpose_mat
print("Multiplication of Matrices")
print(mul_mat)

输出：

[1] "Sparse Matrix"
4 x 6 sparse Matrix of class "dgCMatrix"                  
[1,] 10 10 . .  .  .
[2,]  .  . . .  . 10
[3,]  . 10 . . 10  .
[4,]  . 10 . .  .  .
[1] "Multiplication of Matrices"
4 x 4 sparse Matrix of class "dgCMatrix"
                   
[1,] 200   . 100 100
[2,]   . 100   .   .
[3,] 100   . 200 100
[4,] 100   . 100 100

乘以向量

矩阵可以乘以一维向量，以转换数据。行乘以向量的相应元素，即第一行乘以向量的第一个索引元素，直到向量的长度。

电阻

library(Matrix)
  
# construct a matrix with values
#   0 with probability 0.80
#   6 with probability 0.10
#   7 with probability 0.10
set.seed(0)
rows <- 4L
cols <- 6L
vals <- sample(
  x = c(0, 10), 
  prob = c(0.85, 0.15), 
  size = rows * cols, 
  replace = TRUE
)
dense_mat <- matrix(vals, nrow = rows)
  
# Convert to sparse 
sparse_mat <- as(dense_mat, "sparseMatrix")
print("Sparse Matrix")
print(sparse_mat)
  
# declaring a vector 
vec <- c(3, 2)
print("Multiplication by vector")
print(sparse_mat * vec)

输出：

[1] "Sparse Matrix"
4 x 6 sparse Matrix of class "dgCMatrix"                  
[1,] 10 10 . .  .  .
[2,]  .  . . .  . 10
[3,]  . 10 . . 10  .
[4,]  . 10 . .  .  .
[1] "Multiplication by vector"
4 x 6 sparse Matrix of class "dgCMatrix"
                   
[1,] 30 30 . .  .  .
[2,]  .  . . .  . 20
[3,]  . 30 . . 30  .
[4,]  . 20 . .  .  .

矩阵组合

可以使用列绑定cbind()或行绑定rbind()操作将矩阵与向量或其他矩阵组合。结果矩阵行是rbind()函数中输入矩阵行的总和，列是cbind()中输入矩阵列的总和。

电阻

library(Matrix)
  
# construct a matrix with values
#   0 with probability 0.80
#   6 with probability 0.10
#   7 with probability 0.10
set.seed(0)
rows <- 4L
cols <- 6L
vals <- sample(
  x = c(0, 10), 
  prob = c(0.85, 0.15), 
  size = rows * cols, 
  replace = TRUE
)
dense_mat <- matrix(vals, nrow = rows)
  
# Convert to sparse 
sparse_mat <- as(dense_mat, "sparseMatrix")
print("Sparse Matrix")
print(sparse_mat)
  
# combining matrix through rows
row_bind <- rbind(sparse_mat,
                  sparse_mat)
  
# printing matrix after row bind
print ("Row Bind")
print (row_bind)

输出：

[1] "Sparse Matrix"
4 x 6 sparse Matrix of class "dgCMatrix"
                   
[1,] 10 10 . .  .  .
[2,]  .  . . .  . 10
[3,]  . 10 . . 10  .
[4,]  . 10 . .  .  .
[1] "Row Bind"
8 x 6 sparse Matrix of class "dgCMatrix"
                   
[1,] 10 10 . .  .  .
[2,]  .  . . .  . 10
[3,]  . 10 . . 10  .
[4,]  . 10 . .  .  .
[5,] 10 10 . .  .  .
[6,]  .  . . .  . 10
[7,]  . 10 . . 10  .
[8,]  . 10 . .  .  .

稀疏矩阵的性质

NA值
NA 值不被视为等同于稀疏性，因此被视为非零值。但是，它们不参与任何稀疏矩阵运算。

电阻

library(Matrix)
  
# declaring original matrix 
mat <- matrix(data = c(5.5, 0, NA, 
                         0, 0, NA), nrow = 3)
print("Original Matrix")
print(mat)
sparse_mat <- as(mat, "sparseMatrix")
print("Sparse Matrix")
print(sparse_mat)

输出：

[1] "Original Matrix"
    [,1] [,2]
[1,]  5.5    0
[2,]  0.0    0
[3,]   NA   NA
[1] "Sparse Matrix"
3 x 2 sparse Matrix of class "dgCMatrix"
         
[1,] 5.5  .
[2,] .    .
[3,]  NA NA

稀疏矩阵数据可以写入MatrixMarketformat(.mtx) 中的普通文件。 WriteMM函数可用于将稀疏矩阵的数据传输到文件中。

writeMM(obj-matrix,file="fname.mtx")