repl.it
@candh/

k-means clustering

Python

https://www.saedsayad.com/clustering_kmeans.htm

fork
loading
Files
  • main.py
  • Packager files
  • poetry.lock
  • pyproject.toml
  • requirements.txt
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import statistics
from tabulate import tabulate
from collections import defaultdict


def kmeans_table(arr, centers, k):
    n = len(arr)
    distances = []
    # compute distances first
    for j in range(0, k):
        distance = []
        for i in range(0, n):
            distance.append(abs(arr[i] - centers[j]))
        distances.append(distance)
    # print(distances)

    mins = [list(x).index(min(*x)) for x in zip(*distances)]

    temp_k = len(set(mins))
    clusters = defaultdict(list)

    for i in range(0, len(mins)):
        clusters[str(mins[i])].append(arr[i])

    means = []
    for i in range(0, temp_k):
        means.append(statistics.mean(clusters[str(i)]))

    # print(clusters)
    # print(means)

    headers = ["Point"]
    headers += ["cj"] * k
    headers += ["dj"] * k
    headers.append("Nearest Cluster")
    print(
        tabulate(
            zip(arr, *zip(*[centers] * n), *
                distances, map(lambda x: x + 1, mins)),
            headers=headers,
        )
    )
    print("New Centeriods / Means are:", means,
          "in order (1st cluster to N cluster)")
    print("Clusters", clusters)
    return means


# arr = [15, 15, 16, 19, 19, 20, 20, 21, 22, 28, 35, 40, 41, 42, 43, 44, 60, 61, 65]
# k = 2
# centers = [16, 22]


arr = [15, 16, 19, 19, 20, 21, 22, 28, 35, 40, 42, 44, 60, 61, 65]
k = 3
centers = [44, 20, 28]

for i in range(4):
    print("Iteration: {}".format(i + 1))
    centers = kmeans_table(arr, centers, k)
    print("\n")
Fetching token
?