repl.it
Python

BOT

fork
loading
Files
  • main.py
  • d.txt
  • data.txt
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import tensorflow
import keras
import numpy as np

def prepareData():

  f = open('data.txt','r')
  text = f.read().lower()
  f.close()

  import re
  regex = re.compile('[^a-zA-Z0-9 ]')
  text = regex.sub('', text)
  text = text.split()

  lines = []
  for i in range(0, len(text)-50):
    sequence = text[i:i+50]
    line = ' '.join(sequence)
    lines.append(line)

  tokenizer = keras.preprocessing.text.Tokenizer()
  tokenizer.fit_on_texts(lines)
  sequences = tokenizer.texts_to_sequences(lines)

  numWords = len(tokenizer.word_index) + 1

  x = sequences[:][:-1]
  y = sequences[:][-1]
  
  y = keras.utils.to_categorical(y, num_classes=numWords)
  x = np.transpose(np.stack(x[:]))

  return x,y,numWords


from keras.layers import Embedding,Dense,LSTM

def createModel(x,y,numWords):
  model = keras.models.Sequential()
  model.add(Embedding(numWords,50,input_length=x.shape[1]))
  model.add(LSTM(100,return_sequences=True))
  model.add(LSTM(100))
  model.add(Dense(100, activation='relu'))
  model.add(Dense(numWords, activation='softmax'))
  #print(model.summary())

  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

  model.fit(x, y, batch_size=128, epochs=30)

  print('done')


x,y,numWords = prepareData()
createModel(x,y,numWords)
?