repl.it
@tsimpson1379/

Substitution cipher

Python

No description

fork
loading
Files
  • main.py
  • english_quadgrams.txt
  • english_quintgrams.txt
  • english_trigrams.txt
  • imaginativeFileName.py
  • input.txt
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from imaginativeFileName import *


S = {4: 12.0, 19: 9.1, 0: 8.12, 14: 7.68, 8: 7.31, 13: 6.95, 18: 6.28, 17: 6.02, 7: 5.92, 3: 4.32, 11: 3.98, 20: 2.88, 2: 2.71, 12: 2.61, 5: 2.3, 24: 2.11, 22: 2.09, 6: 2.03, 15: 1.82, 1: 1.49, 21: 1.11, 10: 0.69, 23: 0.17, 16: 0.11, 9: 0.1, 25: 0.07} # English character scores
a = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' # Alphabet

keys = {}

origMessage = ''.join(c for c in open('input.txt').read().upper() if c in a)
m = origMessage

freqs = analyse(m)
bigramFreqs = analyse(m, 2)
trigramFreqs = analyse(m, 3)

# Substitutes most likely 'the' candidate
theC = trigramFreqs[0][0]
if theC[2] == freqs[0][0]:
    for n in range(3):
        m = m.replace(theC[n], 'the'[n])
        keys[theC[n]] = 'the'[n]
    print("Replaced most common trigram with 'the'")

'''
aC = freqs[2][0]
m = m.replace(aC, 'a')
print('Replaced '+aC+' with a')
'''

# Looks for th?t and th?s matches
m, keys = find(m, 'th?t', 'a', keys)
m, keys = find(m, 'th?s', 'i', keys)
print(m)

'''
for g in range(3, 6):
    print('Testing ngrams, n='+str(g))
    for n in range(len(m)-g+1):
        q = m[n:n+g]
        # Replaces uppercase chars with ?
        for char in q:
            if char.isupper():
                q = q.replace(char, '?')
        if q.count('?') == 1:
            possibilities = fnmatch.filter(grams[g], q.upper())
            if len(possibilities) == 1:
                x = q.index('?')
                c = m[n+x]
                t = possibilities[0][x].lower()
                m = m.replace(c, t)
                print(q)
                print('REPLACED: '+c+' '+t)
'''

changed = True
a = 1
while changed:
    print('RUNNING TEST NO '+str(a))
    changed = False
    for g in range(3, 6):
        print('Testing ngrams, n='+str(g))
        r = len(m)-g+1
        for n in range(r):
            if n % 100 == 0: print(str(n) + ' / ' + str(r))
            q = m[n:n+g]
            # Replaces uppercase chars with ?
            for char in q:
                if char.isupper():
                    q = q.replace(char, '?')
            if q.count('?') == 1:
                possibilities = fnmatch.filter(grams[g], q.upper())
                if len(possibilities) == 1:
                    x = q.index('?')
                    c = m[n+x]
                    t = possibilities[0][x].lower()
                    if not c in keys.keys():
                        m = m.replace(c, t)
                        keys[c] = t
                        print(q)
                        print('REPLACED: '+c+' '+t)
                        changed = True
    a+=1

print('Finished ngram tests because nothing changed')
print(m)

print(keys)
?