Загрузить файлы ''

This commit is contained in:
Stanislav Matsunych 2020-05-17 21:02:31 +00:00
parent 630d3a1d6f
commit 51a037c48b
5 changed files with 20339 additions and 0 deletions

90
Bot.py Normal file
View File

@ -0,0 +1,90 @@
from telegram import Bot
from telegram import Update
from telegram.ext import Updater
from telegram.ext import MessageHandler
from telegram.ext import Filters
from pickle import load
from keras.models import load_model
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
def generate_seq(model, mapping, seq_length, seed_text, n_chars):
in_text = seed_text
# generate a fixed number of characters
for _ in range(n_chars):
# encode the characters as integers
encoded = [mapping[char] for char in in_text]
# truncate sequences to a fixed length
encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
# one hot encode
encoded = to_categorical(encoded, num_classes=len(mapping))
# predict character
yhat = model.predict_classes(encoded, verbose=0)
# reverse map integer to character
out_char = ''
for char, index in mapping.items():
if index == yhat:
out_char = char
break
# append to input
if char == ' ':
char = '_'
in_text += char
return in_text
TG_TOKEN = "1011115574:AAHLaC4jgtkYGxL9wILnMjmTxsHLIqsGDZE"
BUFF = ''
def message_handler(bot: Bot, update: Update):
sim = 5
model = load_model('model.h5')
global BUFF
# load the mapping
mapping = load(open('mapping.pkl', 'rb'))
user = update.effective_user
bot.send_message(chat_id=update.effective_message.chat_id,
text="Введи начало никнейма")
text = update.effective_message.text
text_in = BUFF + text
nike = generate_seq(model, mapping, 10, text_in, sim)
nik = ''
iterator = (sim + len(text))*-1
while iterator != 0:
nik += nike[iterator]
iterator += 1
replay_text = f'{nik}'
bot.send_message(chat_id=update.effective_message.chat_id,
text=replay_text)
BUFF += nik
return
def main():
bot = Bot(
token=TG_TOKEN,
)
updater = Updater(
bot=bot,
)
hendler = MessageHandler(Filters.all, message_handler)
updater.dispatcher.add_handler(hendler)
updater.start_polling()
updater.idle()
if __name__ == '__main__':
main()

59
Create_Model.py Normal file
View File

@ -0,0 +1,59 @@
from numpy import array
from pickle import dump
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
# load doc into memory
def load_doc(filename):
# open the file as read only
file = open(filename, 'r')
# read all text
text = file.read()
# close the file
file.close()
return text
# load
in_filename = 'char_sequences.txt'
raw_text = load_doc(in_filename)
lines = raw_text.split('\n')
# integer encode sequences of characters
chars = sorted(list(set(raw_text)))
mapping = dict((c, i) for i, c in enumerate(chars))
sequences = list()
for line in lines:
# integer encode line
encoded_seq = [mapping[char] for char in line]
# store
sequences.append(encoded_seq)
# vocabulary size
vocab_size = len(mapping)
print('Vocabulary Size: %d' % vocab_size)
# separate into input and output
sequences = array(sequences)
X, y = sequences[:, :-1], sequences[:, -1]
sequences = [to_categorical(x, num_classes=vocab_size) for x in X]
X = array(sequences)
y = to_categorical(y, num_classes=vocab_size)
# define model
model = Sequential()
model.add(LSTM(75, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(vocab_size, activation='softmax'))
print(model.summary())
# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit model
model.fit(X, y, epochs=100, verbose=2)
# save the model to file
model.save('model.h5')
# save the mapping
dump(mapping, open('mapping.pkl', 'wb'))

40
Create_data.py Normal file
View File

@ -0,0 +1,40 @@
# load doc into memory
def load_doc(filename):
# open the file as read only
file = open(filename, 'r')
# read all text
text = file.read()
# close the file
file.close()
return text
# save tokens to file, one dialog per line
def save_doc(lines, filename):
data = '\n'.join(lines)
file = open(filename, 'w')
file.write(data)
file.close()
# load text
raw_text = load_doc('rhyme.txt')
print(raw_text)
# clean
tokens = raw_text.split()
raw_text = ' '.join(tokens)
# organize into sequences of characters
length = 10
sequences = list()
for i in range(length, len(raw_text)):
# select sequence of tokens
seq = raw_text[i - length:i + 1]
# store
sequences.append(seq)
print('Total Sequences: %d' % len(sequences))
# save sequences to file
out_filename = 'char_sequences.txt'
save_doc(sequences, out_filename)

42
Generate.py Normal file
View File

@ -0,0 +1,42 @@
from pickle import load
from keras.models import load_model
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
# generate a sequence of characters with a language model
def generate_seq(model, mapping, seq_length, seed_text, n_chars):
in_text = seed_text
# generate a fixed number of characters
for _ in range(n_chars):
# encode the characters as integers
encoded = [mapping[char] for char in in_text]
# truncate sequences to a fixed length
encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
# one hot encode
encoded = to_categorical(encoded, num_classes=len(mapping))
# predict character
yhat = model.predict_classes(encoded, verbose=0)
# reverse map integer to character
out_char = ''
for char, index in mapping.items():
if index == yhat:
out_char = char
break
# append to input
in_text += char
return in_text
# load the model
model = load_model('model.h5')
# load the mapping
mapping = load(open('mapping.pkl', 'rb'))
print(generate_seq(model, mapping, 10, 'Mar', 7))

20108
char_sequences.txt Normal file

File diff suppressed because it is too large Load Diff