repl.it
@GarethDwyer1/

SparseMatrices

Python

No description

fork
loading
Files
  • main.py
  • sentences.txt
  • Packager files
  • poetry.lock
  • pyproject.toml
  • requirements.txt
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer

vec = TfidfVectorizer()


sentences = []
with open("sentences.txt") as f:
    for line in f:
        sentences.append(line.split("\t")[0].strip())

sentences = sentences
vectors = vec.fit_transform(sentences)


similarities = (vectors * vectors.T)# print(similarities.toarray())

print(similarities.shape)


similarities = np.around(similarities, 2)
print(type(similarities))

print(similarities.data.nbytes/1024/1024)

print(similarities.toarray().nbytes/1024/1024)

print("setting diagonal")
similarities.setdiag(0)
print("set diagonal, fiding max")


# print(similarities.toarray().argsort())
# print(similarities.argmax(axis=0))
# print(similarities.toarray()




jjjjjjjjjjk
# print(sentences[1668])
# print(sentences[1515])

# print(similarities[140000])


# similarities.diagonal = [0,0,0,0,0,0,0,0,0,0]

# print(similarities.toarray())



f


Fetching token
?