📅  最后修改于: 2022-03-11 14:46:53.975000             🧑  作者: Mango
# Removing the outliers
def removeOutliers(data, col):
Q3 = np.quantile(data[col], 0.75)
Q1 = np.quantile(data[col], 0.25)
IQR = Q3 - Q1
print("IQR value for column %s is: %s" % (col, IQR))
global outlier_free_list
global filtered_data
lower_range = Q1 - 1.5 * IQR
upper_range = Q3 + 1.5 * IQR
outlier_free_list = [x for x in data[col] if (
(x > lower_range) & (x < upper_range))]
filtered_data = data.loc[data[col].isin(outlier_free_list)]
for i in data.columns:
if i == data.columns[0]:
removeOutliers(data, i)
else:
removeOutliers(filtered_data, i)
# Assigning filtered data back to our original variable
data = filtered_data
print("Shape of data after outlier removal is: ", data.shape)