Загрузить файлы ''
This commit is contained in:
parent
630d3a1d6f
commit
51a037c48b
90
Bot.py
Normal file
90
Bot.py
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
from telegram import Bot
|
||||||
|
from telegram import Update
|
||||||
|
from telegram.ext import Updater
|
||||||
|
from telegram.ext import MessageHandler
|
||||||
|
from telegram.ext import Filters
|
||||||
|
|
||||||
|
from pickle import load
|
||||||
|
from keras.models import load_model
|
||||||
|
from keras.utils import to_categorical
|
||||||
|
from keras.preprocessing.sequence import pad_sequences
|
||||||
|
|
||||||
|
|
||||||
|
def generate_seq(model, mapping, seq_length, seed_text, n_chars):
|
||||||
|
in_text = seed_text
|
||||||
|
# generate a fixed number of characters
|
||||||
|
for _ in range(n_chars):
|
||||||
|
# encode the characters as integers
|
||||||
|
encoded = [mapping[char] for char in in_text]
|
||||||
|
# truncate sequences to a fixed length
|
||||||
|
encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
|
||||||
|
# one hot encode
|
||||||
|
encoded = to_categorical(encoded, num_classes=len(mapping))
|
||||||
|
# predict character
|
||||||
|
yhat = model.predict_classes(encoded, verbose=0)
|
||||||
|
# reverse map integer to character
|
||||||
|
out_char = ''
|
||||||
|
for char, index in mapping.items():
|
||||||
|
if index == yhat:
|
||||||
|
out_char = char
|
||||||
|
break
|
||||||
|
# append to input
|
||||||
|
if char == ' ':
|
||||||
|
char = '_'
|
||||||
|
in_text += char
|
||||||
|
return in_text
|
||||||
|
|
||||||
|
|
||||||
|
TG_TOKEN = "1011115574:AAHLaC4jgtkYGxL9wILnMjmTxsHLIqsGDZE"
|
||||||
|
|
||||||
|
BUFF = ''
|
||||||
|
|
||||||
|
|
||||||
|
def message_handler(bot: Bot, update: Update):
|
||||||
|
sim = 5
|
||||||
|
model = load_model('model.h5')
|
||||||
|
global BUFF
|
||||||
|
|
||||||
|
# load the mapping
|
||||||
|
mapping = load(open('mapping.pkl', 'rb'))
|
||||||
|
user = update.effective_user
|
||||||
|
|
||||||
|
bot.send_message(chat_id=update.effective_message.chat_id,
|
||||||
|
text="Введи начало никнейма")
|
||||||
|
|
||||||
|
text = update.effective_message.text
|
||||||
|
text_in = BUFF + text
|
||||||
|
nike = generate_seq(model, mapping, 10, text_in, sim)
|
||||||
|
nik = ''
|
||||||
|
iterator = (sim + len(text))*-1
|
||||||
|
while iterator != 0:
|
||||||
|
nik += nike[iterator]
|
||||||
|
iterator += 1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
replay_text = f'{nik}'
|
||||||
|
bot.send_message(chat_id=update.effective_message.chat_id,
|
||||||
|
text=replay_text)
|
||||||
|
|
||||||
|
BUFF += nik
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
bot = Bot(
|
||||||
|
token=TG_TOKEN,
|
||||||
|
)
|
||||||
|
|
||||||
|
updater = Updater(
|
||||||
|
bot=bot,
|
||||||
|
)
|
||||||
|
|
||||||
|
hendler = MessageHandler(Filters.all, message_handler)
|
||||||
|
updater.dispatcher.add_handler(hendler)
|
||||||
|
updater.start_polling()
|
||||||
|
updater.idle()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
59
Create_Model.py
Normal file
59
Create_Model.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
from numpy import array
|
||||||
|
from pickle import dump
|
||||||
|
from keras.utils import to_categorical
|
||||||
|
from keras.models import Sequential
|
||||||
|
from keras.layers import Dense
|
||||||
|
from keras.layers import LSTM
|
||||||
|
|
||||||
|
|
||||||
|
# load doc into memory
|
||||||
|
def load_doc(filename):
|
||||||
|
# open the file as read only
|
||||||
|
file = open(filename, 'r')
|
||||||
|
# read all text
|
||||||
|
text = file.read()
|
||||||
|
# close the file
|
||||||
|
file.close()
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
# load
|
||||||
|
in_filename = 'char_sequences.txt'
|
||||||
|
raw_text = load_doc(in_filename)
|
||||||
|
lines = raw_text.split('\n')
|
||||||
|
|
||||||
|
# integer encode sequences of characters
|
||||||
|
chars = sorted(list(set(raw_text)))
|
||||||
|
mapping = dict((c, i) for i, c in enumerate(chars))
|
||||||
|
sequences = list()
|
||||||
|
for line in lines:
|
||||||
|
# integer encode line
|
||||||
|
encoded_seq = [mapping[char] for char in line]
|
||||||
|
# store
|
||||||
|
sequences.append(encoded_seq)
|
||||||
|
|
||||||
|
# vocabulary size
|
||||||
|
vocab_size = len(mapping)
|
||||||
|
print('Vocabulary Size: %d' % vocab_size)
|
||||||
|
|
||||||
|
# separate into input and output
|
||||||
|
sequences = array(sequences)
|
||||||
|
X, y = sequences[:, :-1], sequences[:, -1]
|
||||||
|
sequences = [to_categorical(x, num_classes=vocab_size) for x in X]
|
||||||
|
X = array(sequences)
|
||||||
|
y = to_categorical(y, num_classes=vocab_size)
|
||||||
|
|
||||||
|
# define model
|
||||||
|
model = Sequential()
|
||||||
|
model.add(LSTM(75, input_shape=(X.shape[1], X.shape[2])))
|
||||||
|
model.add(Dense(vocab_size, activation='softmax'))
|
||||||
|
print(model.summary())
|
||||||
|
# compile model
|
||||||
|
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
|
||||||
|
# fit model
|
||||||
|
model.fit(X, y, epochs=100, verbose=2)
|
||||||
|
|
||||||
|
# save the model to file
|
||||||
|
model.save('model.h5')
|
||||||
|
# save the mapping
|
||||||
|
dump(mapping, open('mapping.pkl', 'wb'))
|
40
Create_data.py
Normal file
40
Create_data.py
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
# load doc into memory
|
||||||
|
def load_doc(filename):
|
||||||
|
# open the file as read only
|
||||||
|
file = open(filename, 'r')
|
||||||
|
# read all text
|
||||||
|
text = file.read()
|
||||||
|
# close the file
|
||||||
|
file.close()
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
# save tokens to file, one dialog per line
|
||||||
|
def save_doc(lines, filename):
|
||||||
|
data = '\n'.join(lines)
|
||||||
|
file = open(filename, 'w')
|
||||||
|
file.write(data)
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
|
||||||
|
# load text
|
||||||
|
raw_text = load_doc('rhyme.txt')
|
||||||
|
print(raw_text)
|
||||||
|
|
||||||
|
# clean
|
||||||
|
tokens = raw_text.split()
|
||||||
|
raw_text = ' '.join(tokens)
|
||||||
|
|
||||||
|
# organize into sequences of characters
|
||||||
|
length = 10
|
||||||
|
sequences = list()
|
||||||
|
for i in range(length, len(raw_text)):
|
||||||
|
# select sequence of tokens
|
||||||
|
seq = raw_text[i - length:i + 1]
|
||||||
|
# store
|
||||||
|
sequences.append(seq)
|
||||||
|
print('Total Sequences: %d' % len(sequences))
|
||||||
|
|
||||||
|
# save sequences to file
|
||||||
|
out_filename = 'char_sequences.txt'
|
||||||
|
save_doc(sequences, out_filename)
|
42
Generate.py
Normal file
42
Generate.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
from pickle import load
|
||||||
|
from keras.models import load_model
|
||||||
|
from keras.utils import to_categorical
|
||||||
|
from keras.preprocessing.sequence import pad_sequences
|
||||||
|
|
||||||
|
|
||||||
|
# generate a sequence of characters with a language model
|
||||||
|
def generate_seq(model, mapping, seq_length, seed_text, n_chars):
|
||||||
|
in_text = seed_text
|
||||||
|
# generate a fixed number of characters
|
||||||
|
for _ in range(n_chars):
|
||||||
|
# encode the characters as integers
|
||||||
|
encoded = [mapping[char] for char in in_text]
|
||||||
|
# truncate sequences to a fixed length
|
||||||
|
encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
|
||||||
|
# one hot encode
|
||||||
|
encoded = to_categorical(encoded, num_classes=len(mapping))
|
||||||
|
# predict character
|
||||||
|
yhat = model.predict_classes(encoded, verbose=0)
|
||||||
|
# reverse map integer to character
|
||||||
|
out_char = ''
|
||||||
|
for char, index in mapping.items():
|
||||||
|
if index == yhat:
|
||||||
|
out_char = char
|
||||||
|
break
|
||||||
|
# append to input
|
||||||
|
in_text += char
|
||||||
|
return in_text
|
||||||
|
|
||||||
|
|
||||||
|
# load the model
|
||||||
|
model = load_model('model.h5')
|
||||||
|
|
||||||
|
# load the mapping
|
||||||
|
mapping = load(open('mapping.pkl', 'rb'))
|
||||||
|
|
||||||
|
print(generate_seq(model, mapping, 10, 'Mar', 7))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
20108
char_sequences.txt
Normal file
20108
char_sequences.txt
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user