BC_Matsunych_2020_Final/Create_Model.py

from numpy import array
from pickle import dump
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.callbacks import CSVLogger


# load doc into memory
def load_doc(filename):
    # open the file as read only
    file = open(filename, 'r')
    # read all text
    text = file.read()
    # close the file
    file.close()
    return text


# load
in_filename = 'char_sequences.txt'
raw_text = load_doc(in_filename)
lines = raw_text.split('\n')

# integer encode sequences of characters
chars = sorted(list(set(raw_text)))
mapping = dict((c, i) for i, c in enumerate(chars))
sequences = list()
for line in lines:
    # integer encode line
    encoded_seq = [mapping[char] for char in line]
    # store
    sequences.append(encoded_seq)

# vocabulary size
vocab_size = len(mapping)
print('Vocabulary Size: %d' % vocab_size)

# separate into input and output
sequences = array(sequences)
X, y = sequences[:, :-1], sequences[:, -1]
sequences = [to_categorical(x, num_classes=vocab_size) for x in X]
X = array(sequences)
y = to_categorical(y, num_classes=vocab_size)

# define model
model = Sequential()
model.add(LSTM(250, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(LSTM(250, return_sequences=True))
model.add((LSTM(250)))
model.add(Dense(vocab_size, activation='softmax'))
# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# print(model.summary())
# fit model
csv_logger = CSVLogger('log.csv', append=True, separator=';')

model.fit(X, y, epochs=30, verbose=2, callbacks=[csv_logger])

# save the model to file
model.save('model.h5')
# save the mapping
dump(mapping, open('mapping.pkl', 'wb'))
Загрузить файлы '' 2020-08-28 10:38:38 +00:00			`from numpy import array`
			`from pickle import dump`
			`from keras.utils import to_categorical`
			`from keras.models import Sequential`
			`from keras.layers import Dense`
			`from keras.layers import LSTM`
			`from keras.callbacks import CSVLogger`


			`# load doc into memory`
			`def load_doc(filename):`
			`# open the file as read only`
			`file = open(filename, 'r')`
			`# read all text`
			`text = file.read()`
			`# close the file`
			`file.close()`
			`return text`


			`# load`
			`in_filename = 'char_sequences.txt'`
			`raw_text = load_doc(in_filename)`
			`lines = raw_text.split('\n')`

			`# integer encode sequences of characters`
			`chars = sorted(list(set(raw_text)))`
			`mapping = dict((c, i) for i, c in enumerate(chars))`
			`sequences = list()`
			`for line in lines:`
			`# integer encode line`
			`encoded_seq = [mapping[char] for char in line]`
			`# store`
			`sequences.append(encoded_seq)`

			`# vocabulary size`
			`vocab_size = len(mapping)`
			`print('Vocabulary Size: %d' % vocab_size)`

			`# separate into input and output`
			`sequences = array(sequences)`
			`X, y = sequences[:, :-1], sequences[:, -1]`
			`sequences = [to_categorical(x, num_classes=vocab_size) for x in X]`
			`X = array(sequences)`
			`y = to_categorical(y, num_classes=vocab_size)`

			`# define model`
			`model = Sequential()`
			`model.add(LSTM(250, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))`
			`model.add(LSTM(250, return_sequences=True))`
			`model.add((LSTM(250)))`
			`model.add(Dense(vocab_size, activation='softmax'))`
			`# compile model`
			`model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])`
			`# print(model.summary())`
			`# fit model`
			`csv_logger = CSVLogger('log.csv', append=True, separator=';')`

			`model.fit(X, y, epochs=30, verbose=2, callbacks=[csv_logger])`

			`# save the model to file`
			`model.save('model.h5')`
			`# save the mapping`
			`dump(mapping, open('mapping.pkl', 'wb'))`