📌 相关文章

📜 使用 Dplyr 删除 R 中的重复行

📅 最后修改于: 2022-05-13 01:54:30.782000 🧑 作者: Mango

使用 Dplyr 删除 R 中的重复行

在本文中，我们将使用 Dplyr 包删除 R 编程语言中的重复行。

方法一：distinct()

此函数用于删除数据框中的重复行并获取唯一数据

句法：

distinct(dataframe)

编程需要懂一点英语

我们还可以根据数据框中的多列/变量删除重复的行

句法：

distinct(dataframe,column1,column2,.,column n)

编程需要懂一点英语

使用中的数据集：

示例 1：从数据框中删除重复行的 R 程序

R

# load the package
library(dplyr)
  
# create dataframe with three columns
# named id,name and address
data1=data.frame(id=c(1,2,3,4,5,6,7,1,4,2),
                   
                 name=c('sravan','ojaswi','bobby',
                        'gnanesh','rohith','pinkey',
                        'dhanush','sravan','gnanesh',
                        'ojaswi'),
                   
                 address=c('hyd','hyd','ponnur','tenali',
                           'vijayawada','vijayawada','guntur',
                           'hyd','tenali','hyd'))
  
# remove duplicate rows
print(distinct(data1))

R

# load the package
library(dplyr)
  
# create dataframe with three columns 
# named id,name and address
data1=data.frame(id=c(1,2,3,4,5,6,7,1,4,2),
                   
                 name=c('sravan','ojaswi','bobby',
                        'gnanesh','rohith','pinkey',
                        'dhanush','sravan','gnanesh',
                        'ojaswi'),
                   
                 address=c('hyd','hyd','ponnur','tenali',
                           'vijayawada','vijayawada','guntur',
                           'hyd','tenali','hyd'))
  
# remove duplicate rows based on name 
# column
print(distinct(data1,name))

R

# load the package
library(dplyr)
  
# create dataframe with three columns 
# named id,name and address
data1=data.frame(id=c(1,2,3,4,5,6,7,1,4,2),
                   
                 name=c('sravan','ojaswi','bobby',
                        'gnanesh','rohith','pinkey',
                        'dhanush','sravan','gnanesh',
                        'ojaswi'),
                   
                 address=c('hyd','hyd','ponnur','tenali',
                           'vijayawada','vijayawada','guntur',
                           'hyd','tenali','hyd'))
  
# remove duplicate rows based on 
# name and address columns
print(distinct(data1,address,name))

R

# load the package
library(dplyr)
  
# create dataframe with three columns
# named id,name and address
data1=data.frame(id=c(1,2,3,4,5,6,7,1,4,2),
                   
                 name=c('sravan','ojaswi','bobby',
                        'gnanesh','rohith','pinkey',
                        'dhanush','sravan','gnanesh',
                        'ojaswi'),
                   
                 address=c('hyd','hyd','ponnur','tenali',
                           'vijayawada','vijayawada','guntur',
                           'hyd','tenali','hyd'))
  
# remove duplicate rows using duplicated()
# function based on name column
print(data1[!duplicated(data1$name), ] )
print("=====================")
  
# remove duplicate rows using duplicated()
# function based on id column
print(data1[!duplicated(data1$id), ] )
print("=====================")
  
# remove duplicate rows using duplicated()
# function based on address column
print(data1[!duplicated(data1$address), ] )
print("=====================")

R

# load the package
library(dplyr)
  
# create dataframe with three columns
# named id,name and address
data1=data.frame(id=c(1,2,3,4,5,6,7,1,4,2),
                   
                 name=c('sravan','ojaswi','bobby',
                        'gnanesh','rohith','pinkey',
                        'dhanush','sravan','gnanesh',
                        'ojaswi'),
                   
                 address=c('hyd','hyd','ponnur','tenali',
                           'vijayawada','vijayawada','guntur',
                           'hyd','tenali','hyd'))
  
# get unique data from the dataframe
print(unique(data1))

R

# load the package
library(dplyr)
  
# create dataframe with three columns
# named id,name and address
data1=data.frame(id=c(1,2,3,4,5,6,7,1,4,2),
                   
                 name=c('sravan','ojaswi','bobby',
                        'gnanesh','rohith','pinkey',
                        'dhanush','sravan','gnanesh',
                        'ojaswi'),
                   
                 address=c('hyd','hyd','ponnur','tenali',
                           'vijayawada','vijayawada','guntur',
                           'hyd','tenali','hyd'))
  
# get unique data from the dataframe
# in id column
print(unique(data1$id))
  
# get unique data from the dataframe 
# in name  column
print(unique(data1$name))
  
# get unique data from the dataframe 
# in address column
print(unique(data1$address))

输出：

示例 2：基于单列删除重复行

电阻

# load the package
library(dplyr)
  
# create dataframe with three columns 
# named id,name and address
data1=data.frame(id=c(1,2,3,4,5,6,7,1,4,2),
                   
                 name=c('sravan','ojaswi','bobby',
                        'gnanesh','rohith','pinkey',
                        'dhanush','sravan','gnanesh',
                        'ojaswi'),
                   
                 address=c('hyd','hyd','ponnur','tenali',
                           'vijayawada','vijayawada','guntur',
                           'hyd','tenali','hyd'))
  
# remove duplicate rows based on name 
# column
print(distinct(data1,name))

输出：

示例 3：删除基于多列的重复行

电阻

# load the package
library(dplyr)
  
# create dataframe with three columns 
# named id,name and address
data1=data.frame(id=c(1,2,3,4,5,6,7,1,4,2),
                   
                 name=c('sravan','ojaswi','bobby',
                        'gnanesh','rohith','pinkey',
                        'dhanush','sravan','gnanesh',
                        'ojaswi'),
                   
                 address=c('hyd','hyd','ponnur','tenali',
                           'vijayawada','vijayawada','guntur',
                           'hyd','tenali','hyd'))
  
# remove duplicate rows based on 
# name and address columns
print(distinct(data1,address,name))

输出：

方法二：使用duplicated()函数

duplicated()函数将返回重复的行，!duplicated()函数将返回唯一的行。

句法：

dataframe[!duplicated(dataframe$column_name), ]

编程需要懂一点英语

这里，dataframe 是输入数据帧，column_name 是数据帧中的列，根据该列删除重复数据。

示例：基于特定列删除重复数据的 R 程序

电阻

# load the package
library(dplyr)
  
# create dataframe with three columns
# named id,name and address
data1=data.frame(id=c(1,2,3,4,5,6,7,1,4,2),
                   
                 name=c('sravan','ojaswi','bobby',
                        'gnanesh','rohith','pinkey',
                        'dhanush','sravan','gnanesh',
                        'ojaswi'),
                   
                 address=c('hyd','hyd','ponnur','tenali',
                           'vijayawada','vijayawada','guntur',
                           'hyd','tenali','hyd'))
  
# remove duplicate rows using duplicated()
# function based on name column
print(data1[!duplicated(data1$name), ] )
print("=====================")
  
# remove duplicate rows using duplicated()
# function based on id column
print(data1[!duplicated(data1$id), ] )
print("=====================")
  
# remove duplicate rows using duplicated()
# function based on address column
print(data1[!duplicated(data1$address), ] )
print("=====================")

输出：

方法 3：使用 unique()函数

unique()函数用于通过返回唯一数据来删除重复行

句法：

unique(dataframe)

编程需要懂一点英语

要从列中获取唯一数据，请传递列的名称以及数据框的名称，

句法：

unique(dataframe$column_name)

编程需要懂一点英语

其中，dataframe 是输入数据帧，column_name 是数据帧中的列。

示例 1：使用 unique()函数删除重复项的 R 程序

电阻

# load the package
library(dplyr)
  
# create dataframe with three columns
# named id,name and address
data1=data.frame(id=c(1,2,3,4,5,6,7,1,4,2),
                   
                 name=c('sravan','ojaswi','bobby',
                        'gnanesh','rohith','pinkey',
                        'dhanush','sravan','gnanesh',
                        'ojaswi'),
                   
                 address=c('hyd','hyd','ponnur','tenali',
                           'vijayawada','vijayawada','guntur',
                           'hyd','tenali','hyd'))
  
# get unique data from the dataframe
print(unique(data1))

输出：

示例 2：用于删除特定列中重复项的 R 程序

电阻

# load the package
library(dplyr)
  
# create dataframe with three columns
# named id,name and address
data1=data.frame(id=c(1,2,3,4,5,6,7,1,4,2),
                   
                 name=c('sravan','ojaswi','bobby',
                        'gnanesh','rohith','pinkey',
                        'dhanush','sravan','gnanesh',
                        'ojaswi'),
                   
                 address=c('hyd','hyd','ponnur','tenali',
                           'vijayawada','vijayawada','guntur',
                           'hyd','tenali','hyd'))
  
# get unique data from the dataframe
# in id column
print(unique(data1$id))
  
# get unique data from the dataframe 
# in name  column
print(unique(data1$name))
  
# get unique data from the dataframe 
# in address column
print(unique(data1$address))

输出：