1234567891011121314151617181920212223242526272829 |
- # https://sky.pro/media/kak-ispolzovat-python-dlya-raboty-s-klasterizacziej-dannyh/
- import pandas as pd
- from sklearn.datasets import load_iris
- from sklearn.cluster import KMeans
- import seaborn as sns
- from matplotlib import pyplot as plt
-
- iris = load_iris()
- data = pd.DataFrame(iris.data, columns=iris.feature_names)
- print(data.head())
-
- kmeans = KMeans(n_clusters=3)
- kmeans.fit(data)
- # центр тяжести двух кластеров:
- print(kmeans.cluster_centers_)
- data['cluster'] = kmeans.labels_
- print(data.head())
- sns.scatterplot(x=iris.feature_names[1], y=iris.feature_names[3], hue = 'cluster', data=data, palette='viridis')
- # sns.scatterplot(x='sepal length (cm)', y='petal length (cm)', hue = 'cluster', data=data, palette='viridis')
- plt.scatter(kmeans.cluster_centers_[0][0], kmeans.cluster_centers_[0][1], label='центр тяжести кластерa 0')
- plt.scatter(kmeans.cluster_centers_[1][0], kmeans.cluster_centers_[1][1], label='центр тяжести кластерa 1', color = 'red')
- plt.scatter(kmeans.cluster_centers_[2][0], kmeans.cluster_centers_[2][1], label='центр тяжести кластерa 2', color = 'violet')
- plt.grid()
- plt.legend()
- plt.show()
|