BC_Matsunych_2020_Final/Perplexity.py

from numpy import array
from keras.utils import to_categorical
import tensorflow as tf


# load doc into memory
def load_doc(filename):
    # open the file as read only
    file = open(filename, 'r')
    # read all text
    text = file.read()
    # close the file
    file.close()
    return text


def input_tensor(in_filename):
    raw_text = load_doc(in_filename)
    lines = raw_text.split('\n')

    # integer encode sequences of characters
    chars = sorted(list(set(raw_text)))
    mapping = dict((c, i) for i, c in enumerate(chars))
    sequences = list()

    for line in lines:
        # integer encode line
        encoded_seq = [mapping[char] for char in line]
        # store
        sequences.append(encoded_seq)

    vocab_size = len(mapping)
    print('Vocabulary Size: %d' % vocab_size)

    # separate into input and output
    sequences = array(sequences)

    X = sequences[:, :-1]

    sequences = [to_categorical(x, num_classes=vocab_size) for x in X]

    X = array(sequences)

    return X


Y = input_tensor('gen_seq.txt')
X = input_tensor('test_seq.txt')

Y = Y[:22, :, :24]
X = X[:22, :, :24]
cce = tf.keras.losses.CategoricalCrossentropy(tf.constant([1.]), tf.constant([0.001]))
a = cce(Y, X).numpy()
print("Cross entropy: ", a)
print("Perplexity: ", 2 ** a)
Загрузить файлы '' 2020-08-28 10:39:32 +00:00			`from numpy import array`
			`from keras.utils import to_categorical`
			`import tensorflow as tf`


			`# load doc into memory`
			`def load_doc(filename):`
			`# open the file as read only`
			`file = open(filename, 'r')`
			`# read all text`
			`text = file.read()`
			`# close the file`
			`file.close()`
			`return text`


			`def input_tensor(in_filename):`
			`raw_text = load_doc(in_filename)`
			`lines = raw_text.split('\n')`

			`# integer encode sequences of characters`
			`chars = sorted(list(set(raw_text)))`
			`mapping = dict((c, i) for i, c in enumerate(chars))`
			`sequences = list()`

			`for line in lines:`
			`# integer encode line`
			`encoded_seq = [mapping[char] for char in line]`
			`# store`
			`sequences.append(encoded_seq)`

			`vocab_size = len(mapping)`
			`print('Vocabulary Size: %d' % vocab_size)`

			`# separate into input and output`
			`sequences = array(sequences)`

			`X = sequences[:, :-1]`

			`sequences = [to_categorical(x, num_classes=vocab_size) for x in X]`

			`X = array(sequences)`

			`return X`


			`Y = input_tensor('gen_seq.txt')`
			`X = input_tensor('test_seq.txt')`

			`Y = Y[:22, :, :24]`
			`X = X[:22, :, :24]`
			`cce = tf.keras.losses.CategoricalCrossentropy(tf.constant([1.]), tf.constant([0.001]))`
			`a = cce(Y, X).numpy()`
			`print("Cross entropy: ", a)`
			`print("Perplexity: ", 2 ** a)`