Python|使用 Sklearn 创建测试数据集
Python 的 Sklearn 库提供了一个很棒的示例数据集生成器,它将帮助您创建自己的自定义数据集。它快速且易于使用。以下是它提供的样本类型。
对于上述所有方法,您需要导入sklearn.datasets.samples_generator 。
Python3
# importing libraries
from sklearn.datasets import make_blobs
# matplotlib for ploting
from matplotlib import pyplot as plt
from matplotlib import style
Python3
# Creating Test DataSets using sklearn.datasets.make_blobs
from sklearn.datasets import make_blobs
from matplotlib import pyplot as plt
from matplotlib import style
style.use("fivethirtyeight")
X, y = make_blobs(n_samples = 100, centers = 3,
cluster_std = 1, n_features = 2)
plt.scatter(X[:, 0], X[:, 1], s = 40, color = 'g')
plt.xlabel("X")
plt.ylabel("Y")
plt.show()
plt.clf()
Python3
# Creating Test DataSets using sklearn.datasets.make_moon
from sklearn.datasets import make_moon
from matplotlib import pyplot as plt
from matplotlib import style
X, y = make_moons(n_samples = 1000, noise = 0.1)
plt.scatter(X[:, 0], X[:, 1], s = 40, color ='g')
plt.xlabel("X")
plt.ylabel("Y")
plt.show()
plt.clf()
Python3
# Creating Test DataSets using sklearn.datasets.make_circles
from sklearn.datasets import make_circles
from matplotlib import pyplot as plt
from matplotlib import style
style.use("fivethirtyeight")
X, y = make_circles(n_samples = 100, noise = 0.02)
plt.scatter(X[:, 0], X[:, 1], s = 40, color ='g')
plt.xlabel("X")
plt.ylabel("Y")
plt.show()
plt.clf()
sklearn.datasets.make_blobs
蟒蛇3
# Creating Test DataSets using sklearn.datasets.make_blobs
from sklearn.datasets import make_blobs
from matplotlib import pyplot as plt
from matplotlib import style
style.use("fivethirtyeight")
X, y = make_blobs(n_samples = 100, centers = 3,
cluster_std = 1, n_features = 2)
plt.scatter(X[:, 0], X[:, 1], s = 40, color = 'g')
plt.xlabel("X")
plt.ylabel("Y")
plt.show()
plt.clf()
输出:

make_blob 有 3 个中心
sklearn.datasets.make_moon
蟒蛇3
# Creating Test DataSets using sklearn.datasets.make_moon
from sklearn.datasets import make_moon
from matplotlib import pyplot as plt
from matplotlib import style
X, y = make_moons(n_samples = 1000, noise = 0.1)
plt.scatter(X[:, 0], X[:, 1], s = 40, color ='g')
plt.xlabel("X")
plt.ylabel("Y")
plt.show()
plt.clf()
输出:

具有 1000 个数据点的 make_moons
sklearn.datasets.make_circle
蟒蛇3
# Creating Test DataSets using sklearn.datasets.make_circles
from sklearn.datasets import make_circles
from matplotlib import pyplot as plt
from matplotlib import style
style.use("fivethirtyeight")
X, y = make_circles(n_samples = 100, noise = 0.02)
plt.scatter(X[:, 0], X[:, 1], s = 40, color ='g')
plt.xlabel("X")
plt.ylabel("Y")
plt.show()
plt.clf()
输出:

用 100 个数据点制作 _circle