📜  如何在 R 中拆分 DataFrame

📅  最后修改于: 2022-05-13 01:55:41.819000             🧑  作者: Mango

如何在 R 中拆分 DataFrame

在本文中,我们将讨论如何在 R 编程语言中拆分数据帧。

子集可以基于行和列进行连续和随机拆分。可以使用索引和名称来引用数据框的行和列。可以使用基 R 中的 c() 方法引用多行和多列。

按行拆分数据帧

按行索引拆分数据帧

可以使用行和列名称和索引来引用数据框单元格。

句法:

data-frame[start-row-num:end-row-num,]

行号保留在最终输出数据帧中。



示例:按行拆分数据帧

R
# create first dataframe
data_frame1<-data.frame(col1=c(rep('Grp1',2),
                               rep('Grp2',2),
                               rep('Grp3',2)), 
                        col2=rep(1:3,2),
                        col3=rep(1:2,3) 
                        )
  
print("Original DataFrame")
print(data_frame1)
  
# extracting first four rows
data_frame_mod <- data_frame1[1:4,]
  
print("Modified DataFrame")
print(data_frame_mod)


R
# create first dataframe
data_frame1<-data.frame(col1=c(rep('Grp1',2),
                               rep('Grp2',2),
                               rep('Grp3',2)), 
                        col2=rep(1:3,2),
                        col3=rep(1:2,3) 
                        )
  
print("Original DataFrame")
print(data_frame1)
  
# extracting first four rows
data_frame_mod <- data_frame1[6,]
print("Modified DataFrame")
print(data_frame_mod)


R
# create first dataframe
data_frame1<-data.frame(col1=c(rep('Grp1',2),
                               rep('Grp2',2),
                               rep('Grp3',2)), 
                        col2=rep(1:3,2),
                        col3=rep(1:2,3),
                        col4 = letters[1:6]
                        )
  
print("Original DataFrame")
print(data_frame1)
  
# extracting last two columns
set.seed(99999)                           
  
rows <- nrow(data_frame1)
rand <- rbinom(rows, 2, 0.5)
  
data_frame_mod <- data_frame1[rand == 0, ] 
  
print("Modified DataFrame")
print(data_frame_mod)


R
# create first dataframe
data_frame1<-data.frame(col1=c(rep('Grp1',2),
                               rep('Grp2',2),
                               rep('Grp3',2)), 
                        col2=rep(1:3,2),
                        col3=rep(1:2,3),
                        col4 = letters[1:6]
                        )
  
print("Original DataFrame")
print(data_frame1)
  
# extracting sixth row
data_frame_mod <- data_frame1[,c("col2","col4")]
print("Modified DataFrame")
print(data_frame_mod)


R
# create first dataframe
data_frame1<-data.frame(col1=c(rep('Grp1',2),
                               rep('Grp2',2),
                               rep('Grp3',2)), 
                        col2=rep(1:3,2),
                        col3=rep(1:2,3),
                        col4 = letters[1:6]
                        )
  
print("Original DataFrame")
print(data_frame1)
  
# extracting last two columns
data_frame_mod <- data_frame1[,c(3:4)]
print("Modified DataFrame")
print(data_frame_mod)


输出:

[1] "Original DataFrame"
col1 col2 col3 
1 Grp1    1    1 
2 Grp1    2    2 
3 Grp2    3    1 
4 Grp2    1    2 
5 Grp3    2    1 
6 Grp3    3    2 
[1] "Modified DataFrame" 
col1 col2 col3 
1 Grp1    1    1 
2 Grp1    2    2 
3 Grp2    3    1 
4 Grp2    1    2

示例:按行拆分数据帧

电阻

# create first dataframe
data_frame1<-data.frame(col1=c(rep('Grp1',2),
                               rep('Grp2',2),
                               rep('Grp3',2)), 
                        col2=rep(1:3,2),
                        col3=rep(1:2,3) 
                        )
  
print("Original DataFrame")
print(data_frame1)
  
# extracting first four rows
data_frame_mod <- data_frame1[6,]
print("Modified DataFrame")
print(data_frame_mod)

输出:

[1] "Original DataFrame"
col1 col2 col3 
1 Grp1    1    1 
2 Grp1    2    2 
3 Grp2    3    1 
4 Grp2    1    2 
5 Grp3    2    1 
6 Grp3    3    2 
[1] "Modified DataFrame" 
col1 col2 col3 
6 Grp3    3    2

随机拆分数据帧行

也可以使用 set.seed() 方法随机生成数据帧行。它生成一个随机样本,然后将其送入任意随机虚拟生成器函数。然后可以通过将它们与函数进行比较来提取行。

示例:按行随机拆分数据帧



电阻

# create first dataframe
data_frame1<-data.frame(col1=c(rep('Grp1',2),
                               rep('Grp2',2),
                               rep('Grp3',2)), 
                        col2=rep(1:3,2),
                        col3=rep(1:2,3),
                        col4 = letters[1:6]
                        )
  
print("Original DataFrame")
print(data_frame1)
  
# extracting last two columns
set.seed(99999)                           
  
rows <- nrow(data_frame1)
rand <- rbinom(rows, 2, 0.5)
  
data_frame_mod <- data_frame1[rand == 0, ] 
  
print("Modified DataFrame")
print(data_frame_mod)

输出:

[1] "Original DataFrame" 
col1 col2 col3 col4
 1 Grp1    1    1    a 
2 Grp1    2    2    b 
3 Grp2    3    1    c 
4 Grp2    1    2    d 
5 Grp3    2    1    e 
6 Grp3    3    2    f 
[1] "Modified DataFrame" 
col1 col2 col3 col4
5 Grp3    2    1    e
6 Grp3    3    2    f

按列拆分数据框

按列名拆分数据框

也可以使用列名来引用数据框。可以使用包含列名作为字符串的 c() 方法指定多个列名。列名本质上可以是连续的或随机的。

句法:

data-frame[,c(col1, col2,...)]

示例:按列名拆分数据框

电阻

# create first dataframe
data_frame1<-data.frame(col1=c(rep('Grp1',2),
                               rep('Grp2',2),
                               rep('Grp3',2)), 
                        col2=rep(1:3,2),
                        col3=rep(1:2,3),
                        col4 = letters[1:6]
                        )
  
print("Original DataFrame")
print(data_frame1)
  
# extracting sixth row
data_frame_mod <- data_frame1[,c("col2","col4")]
print("Modified DataFrame")
print(data_frame_mod)

输出:

[1] "Original DataFrame" 
col1 col2 col3 col4
 1 Grp1    1    1    a 
2 Grp1    2    2    b 
3 Grp2    3    1    c 
4 Grp2    1    2    d 
5 Grp3    2    1    e 
6 Grp3    3    2    f 
[1] "Modified DataFrame" 
col2 col4 
1    1    a 
2    2    b 
3    3    c 
4    1    d 
5    2    e 
6    3    f

按列索引拆分数据框

也可以使用列索引引用数据框。可以通过指定列位置从数据框中提取单个和多个列。

句法:

data-frame[,start-col-num:end-col-num]

示例:按列索引拆分数据框

电阻

# create first dataframe
data_frame1<-data.frame(col1=c(rep('Grp1',2),
                               rep('Grp2',2),
                               rep('Grp3',2)), 
                        col2=rep(1:3,2),
                        col3=rep(1:2,3),
                        col4 = letters[1:6]
                        )
  
print("Original DataFrame")
print(data_frame1)
  
# extracting last two columns
data_frame_mod <- data_frame1[,c(3:4)]
print("Modified DataFrame")
print(data_frame_mod)

输出:

[1] "Original DataFrame" 
col1 col2 col3 col4
 1 Grp1    1    1    a 
2 Grp1    2    2    b 
3 Grp2    3    1    c 
4 Grp2    1    2    d 
5 Grp3    2    1    e 
6 Grp3    3    2    f 
[1] "Modified DataFrame" 
col3 col4 
1    1    a 
2    2    b 
3    1    c 
4    2    d 
5    1    e 
6    2    f