irisclusters.py 1.2 KB

1234567891011121314151617181920212223242526272829
  1. # https://sky.pro/media/kak-ispolzovat-python-dlya-raboty-s-klasterizacziej-dannyh/
  2. import pandas as pd
  3. from sklearn.datasets import load_iris
  4. from sklearn.cluster import KMeans
  5. import seaborn as sns
  6. from matplotlib import pyplot as plt
  7. iris = load_iris()
  8. data = pd.DataFrame(iris.data, columns=iris.feature_names)
  9. print(data.head())
  10. kmeans = KMeans(n_clusters=3)
  11. kmeans.fit(data)
  12. # центр тяжести двух кластеров:
  13. print(kmeans.cluster_centers_)
  14. data['cluster'] = kmeans.labels_
  15. print(data.head())
  16. sns.scatterplot(x=iris.feature_names[1], y=iris.feature_names[3], hue = 'cluster', data=data, palette='viridis')
  17. # sns.scatterplot(x='sepal length (cm)', y='petal length (cm)', hue = 'cluster', data=data, palette='viridis')
  18. plt.scatter(kmeans.cluster_centers_[0][0], kmeans.cluster_centers_[0][1], label='центр тяжести кластерa 0')
  19. plt.scatter(kmeans.cluster_centers_[1][0], kmeans.cluster_centers_[1][1], label='центр тяжести кластерa 1', color = 'red')
  20. plt.scatter(kmeans.cluster_centers_[2][0], kmeans.cluster_centers_[2][1], label='центр тяжести кластерa 2', color = 'violet')
  21. plt.grid()
  22. plt.legend()
  23. plt.show()