56 lines
1.3 KiB
Python
56 lines
1.3 KiB
Python
|
from numpy import array
|
||
|
from keras.utils import to_categorical
|
||
|
import tensorflow as tf
|
||
|
|
||
|
|
||
|
# load doc into memory
|
||
|
def load_doc(filename):
|
||
|
# open the file as read only
|
||
|
file = open(filename, 'r')
|
||
|
# read all text
|
||
|
text = file.read()
|
||
|
# close the file
|
||
|
file.close()
|
||
|
return text
|
||
|
|
||
|
|
||
|
def input_tensor(in_filename):
|
||
|
raw_text = load_doc(in_filename)
|
||
|
lines = raw_text.split('\n')
|
||
|
|
||
|
# integer encode sequences of characters
|
||
|
chars = sorted(list(set(raw_text)))
|
||
|
mapping = dict((c, i) for i, c in enumerate(chars))
|
||
|
sequences = list()
|
||
|
|
||
|
for line in lines:
|
||
|
# integer encode line
|
||
|
encoded_seq = [mapping[char] for char in line]
|
||
|
# store
|
||
|
sequences.append(encoded_seq)
|
||
|
|
||
|
vocab_size = len(mapping)
|
||
|
print('Vocabulary Size: %d' % vocab_size)
|
||
|
|
||
|
# separate into input and output
|
||
|
sequences = array(sequences)
|
||
|
|
||
|
X = sequences[:, :-1]
|
||
|
|
||
|
sequences = [to_categorical(x, num_classes=vocab_size) for x in X]
|
||
|
|
||
|
X = array(sequences)
|
||
|
|
||
|
return X
|
||
|
|
||
|
|
||
|
Y = input_tensor('gen_seq.txt')
|
||
|
X = input_tensor('test_seq.txt')
|
||
|
|
||
|
Y = Y[:22, :, :24]
|
||
|
X = X[:22, :, :24]
|
||
|
cce = tf.keras.losses.CategoricalCrossentropy(tf.constant([1.]), tf.constant([0.001]))
|
||
|
a = cce(Y, X).numpy()
|
||
|
print("Cross entropy: ", a)
|
||
|
print("Perplexity: ", 2 ** a)
|