@heather/

jaro winkler distance

Python

No description

fork
loading
Files
  • main.py
  • foobar.txt

This Plugin Crashed!

Error: Error: must not create an existing file {"type":"CREATE_FILE","wid":"0.5527051101687965","path":"main.py","file":{"path":"main.py","content":{"asEncoding":{"base64":"aW1wb3J0IG1hdGgKCmRlZiBtYXRjaChzMSwgczIpOgogICAgc2V0X29mX21hdGNoZXMgPSBzZXQuaW50ZXJzZWN0aW9uKHNldChzMSksIHNldChzMikpCiAgICByZXR1cm4gc2V0X29mX21hdGNoZXMKCmRlZiB0ZWNobmljYWxfbWF0Y2goczEsIHMyKToKICAgIG1hdGNoZXMgPSBtYXRjaChzMSwgczIpCiAgICBtYXhfZGlzdGFuY2UgPSBtYXRoLmZsb29yKG1heChsZW4oczEpLCBsZW4oczIpLzIpKSAtIDEKICAgIHRydWVfbGlzdCA9IFtdCiAgICBmb3IgaSBpbiBtYXRjaGVzOgogICAgICAgIGRpc3RhbmNlID0gYWJzKHMxLmluZGV4KGkpIC0gczIuaW5kZXgoaSkpCiAgICAgICAgaWYgZGlzdGFuY2UgPD0gbWF4X2Rpc3RhbmNlOgogICAgICAgICAgICB0cnVlX2xpc3QuYXBwZW5kKGkpCiAgICByZXR1cm4gdHJ1ZV9saXN0CgpkZWYgZGlmZl9sZXR0ZXJzKHNlcTEsIHNlcTIpOgogICAgcmV0dXJuIHN1bSgxIGZvciBhLCBiIGluIHppcChzZXExLCBzZXEyKSBpZiBhICE9IGIpCgpkZWYgdHJhbnNwb3NpdGlvbnMoczEsIHMyKToKICAgIHQgPSBsaXN0KHRlY2huaWNhbF9tYXRjaChzMSwgczIpKQogICAgczFfbGlzdCA9IFtdCiAgICBzMl9saXN0ID0gW10KICAgIGZvciBpIGluIHMxOgogICAgICAgIGlmIGkgaW4gdDoKICAgICAgICAgICAgczFfbGlzdC5hcHBlbmQoaSkKICAgIGZvciBpIGluIHMyOgogICAgICAgIGlmIGkgaW4gdDoKICAgICAgICAgICAgczJfbGlzdC5hcHBlbmQoaSkKICAgIHMxID0gJycuam9pbihzMV9saXN0KQogICAgczIgPSAnJy5qb2luKHMyX2xpc3QpCiAgICByZXR1cm4gZGlmZl9sZXR0ZXJzKHMxLCBzMikKCmRlZiBqYXJvX3NpbWlsYXJpdHkoczEsIHMyKToKICAgIG1hdGNoZXMgPSBsZW4odGVjaG5pY2FsX21hdGNoKHMxLCBzMikpCiAgICBpZiBtYXRjaGVzID09IDA6CiAgICAgICAgcmV0dXJuIDAKICAgIGVsc2U6CiAgICAgICAgcmV0dXJuIDEvMyoobWF0Y2hlcy9sZW4oczEpICsgbWF0Y2hlcy9sZW4oczIpICsgKG1hdGNoZXMgKyB0cmFuc3Bvc2l0aW9ucyhzMSwgczIpKS9tYXRjaGVzKQoKbWF0Y2hfdGV4dCA9IG9wZW4oJ2Zvb2Jhci50eHQnLCAncicpLnJlYWQoKS5zcGxpdGxpbmVzKCkKcGF0dGVybiA9ICdoYXQnCmNvbnN0YW50ID0gLjUKCnJlc3VsdHMgPSBbXQpmb3IgaSBpbiBtYXRjaF90ZXh0OgogICAgaWYgamFyb19zaW1pbGFyaXR5KGksIHBhdHRlcm4pID4gY29uc3RhbnQ6CiAgICAgICAgcmVzdWx0cy5hcHBlbmQoaSkKCnByaW50KHJlc3VsdHMp"},"asBuffer":null},"loaded":true}}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import math

def match(s1, s2):
    set_of_matches = set.intersection(set(s1), set(s2))
    return set_of_matches

def technical_match(s1, s2):
    matches = match(s1, s2)
    max_distance = math.floor(max(len(s1), len(s2)/2)) - 1
    true_list = []
    for i in matches:
        distance = abs(s1.index(i) - s2.index(i))
        if distance <= max_distance:
            true_list.append(i)
    return true_list

def diff_letters(seq1, seq2):
    return sum(1 for a, b in zip(seq1, seq2) if a != b)

def transpositions(s1, s2):
    t = list(technical_match(s1, s2))
    s1_list = []
    s2_list = []
    for i in s1:
        if i in t:
            s1_list.append(i)
    for i in s2:
        if i in t:
            s2_list.append(i)
    s1 = ''.join(s1_list)
    s2 = ''.join(s2_list)
    return diff_letters(s1, s2)

def jaro_similarity(s1, s2):
    matches = len(technical_match(s1, s2))
    if matches == 0:
        return 0
    else:
        return 1/3*(matches/len(s1) + matches/len(s2) + (matches + transpositions(s1, s2))/matches)

match_text = open('foobar.txt', 'r').read().splitlines()
pattern = 'hat'
constant = .5

results = []
for i in match_text:
    if jaro_similarity(i, pattern) > constant:
        results.append(i)

print(results)