@candh/

# k-means clustering

Files
• main.py
• Packager files
• poetry.lock
• pyproject.toml
• requirements.txt
main.py
```1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
```
```import statistics
from tabulate import tabulate
from collections import defaultdict

def kmeans_table(arr, centers, k):
n = len(arr)
distances = []
# compute distances first
for j in range(0, k):
distance = []
for i in range(0, n):
distance.append(abs(arr[i] - centers[j]))
distances.append(distance)
# print(distances)

mins = [list(x).index(min(*x)) for x in zip(*distances)]

temp_k = len(set(mins))
clusters = defaultdict(list)

for i in range(0, len(mins)):
clusters[str(mins[i])].append(arr[i])

means = []
for i in range(0, temp_k):
means.append(statistics.mean(clusters[str(i)]))

# print(clusters)
# print(means)

headers += ["cj"] * k
headers += ["dj"] * k
print(
tabulate(
zip(arr, *zip(*[centers] * n), *
distances, map(lambda x: x + 1, mins)),
)
)
print("New Centeriods / Means are:", means,
"in order (1st cluster to N cluster)")
print("Clusters", clusters)
return means

# arr = [15, 15, 16, 19, 19, 20, 20, 21, 22, 28, 35, 40, 41, 42, 43, 44, 60, 61, 65]
# k = 2
# centers = [16, 22]

arr = [15, 16, 19, 19, 20, 21, 22, 28, 35, 40, 42, 44, 60, 61, 65]
k = 3
centers = [44, 20, 28]

for i in range(4):
print("Iteration: {}".format(i + 1))
centers = kmeans_table(arr, centers, k)
print("\n")```
Fetching token