📜  Python|使用 Sklearn 创建测试数据集

📅  最后修改于: 2022-05-13 01:58:07.906000             🧑  作者: Mango

Python|使用 Sklearn 创建测试数据集

Python 的 Sklearn 库提供了一个很棒的示例数据集生成器,它将帮助您创建自己的自定义数据集。它快速且易于使用。以下是它提供的样本类型。
对于上述所有方法,您需要导入sklearn.datasets.samples_generator

Python3
# importing libraries
from sklearn.datasets import make_blobs
 
# matplotlib for ploting
from matplotlib import pyplot as plt
from matplotlib import style


Python3
# Creating Test DataSets using sklearn.datasets.make_blobs
from sklearn.datasets import make_blobs
from matplotlib import pyplot as plt
from matplotlib import style
 
style.use("fivethirtyeight")
 
X, y = make_blobs(n_samples = 100, centers = 3,
               cluster_std = 1, n_features = 2)
 
plt.scatter(X[:, 0], X[:, 1], s = 40, color = 'g')
plt.xlabel("X")
plt.ylabel("Y")
 
plt.show()
plt.clf()


Python3
# Creating Test DataSets using sklearn.datasets.make_moon
from sklearn.datasets import make_moon
from matplotlib import pyplot as plt
from matplotlib import style
 
X, y = make_moons(n_samples = 1000, noise = 0.1)
plt.scatter(X[:, 0], X[:, 1], s = 40, color ='g')
plt.xlabel("X")
plt.ylabel("Y")
 
plt.show()
plt.clf()


Python3
# Creating Test DataSets using sklearn.datasets.make_circles
from sklearn.datasets import make_circles
from matplotlib import pyplot as plt
from matplotlib import style
 
style.use("fivethirtyeight")
 
X, y = make_circles(n_samples = 100, noise = 0.02)
plt.scatter(X[:, 0], X[:, 1], s = 40, color ='g')
plt.xlabel("X")
plt.ylabel("Y")
 
plt.show()
plt.clf()


sklearn.datasets.make_blobs

蟒蛇3

# Creating Test DataSets using sklearn.datasets.make_blobs
from sklearn.datasets import make_blobs
from matplotlib import pyplot as plt
from matplotlib import style
 
style.use("fivethirtyeight")
 
X, y = make_blobs(n_samples = 100, centers = 3,
               cluster_std = 1, n_features = 2)
 
plt.scatter(X[:, 0], X[:, 1], s = 40, color = 'g')
plt.xlabel("X")
plt.ylabel("Y")
 
plt.show()
plt.clf()

输出:

make_blob 有 3 个中心



sklearn.datasets.make_moon

蟒蛇3

# Creating Test DataSets using sklearn.datasets.make_moon
from sklearn.datasets import make_moon
from matplotlib import pyplot as plt
from matplotlib import style
 
X, y = make_moons(n_samples = 1000, noise = 0.1)
plt.scatter(X[:, 0], X[:, 1], s = 40, color ='g')
plt.xlabel("X")
plt.ylabel("Y")
 
plt.show()
plt.clf()

输出:

具有 1000 个数据点的 make_moons

sklearn.datasets.make_circle

蟒蛇3

# Creating Test DataSets using sklearn.datasets.make_circles
from sklearn.datasets import make_circles
from matplotlib import pyplot as plt
from matplotlib import style
 
style.use("fivethirtyeight")
 
X, y = make_circles(n_samples = 100, noise = 0.02)
plt.scatter(X[:, 0], X[:, 1], s = 40, color ='g')
plt.xlabel("X")
plt.ylabel("Y")
 
plt.show()
plt.clf()

输出:

用 100 个数据点制作 _circle