Загрузить файлы ''
This commit is contained in:
		
							parent
							
								
									630d3a1d6f
								
							
						
					
					
						commit
						51a037c48b
					
				
							
								
								
									
										90
									
								
								Bot.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								Bot.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,90 @@
 | 
			
		||||
from telegram import Bot
 | 
			
		||||
from telegram import Update
 | 
			
		||||
from telegram.ext import Updater
 | 
			
		||||
from telegram.ext import MessageHandler
 | 
			
		||||
from telegram.ext import Filters
 | 
			
		||||
 | 
			
		||||
from pickle import load
 | 
			
		||||
from keras.models import load_model
 | 
			
		||||
from keras.utils import to_categorical
 | 
			
		||||
from keras.preprocessing.sequence import pad_sequences
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def generate_seq(model, mapping, seq_length, seed_text, n_chars):
 | 
			
		||||
    in_text = seed_text
 | 
			
		||||
    # generate a fixed number of characters
 | 
			
		||||
    for _ in range(n_chars):
 | 
			
		||||
        # encode the characters as integers
 | 
			
		||||
        encoded = [mapping[char] for char in in_text]
 | 
			
		||||
        # truncate sequences to a fixed length
 | 
			
		||||
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
 | 
			
		||||
        # one hot encode
 | 
			
		||||
        encoded = to_categorical(encoded, num_classes=len(mapping))
 | 
			
		||||
        # predict character
 | 
			
		||||
        yhat = model.predict_classes(encoded, verbose=0)
 | 
			
		||||
        # reverse map integer to character
 | 
			
		||||
        out_char = ''
 | 
			
		||||
        for char, index in mapping.items():
 | 
			
		||||
            if index == yhat:
 | 
			
		||||
                out_char = char
 | 
			
		||||
                break
 | 
			
		||||
        # append to input
 | 
			
		||||
        if char == ' ':
 | 
			
		||||
            char = '_'
 | 
			
		||||
        in_text += char
 | 
			
		||||
    return in_text
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
TG_TOKEN = "1011115574:AAHLaC4jgtkYGxL9wILnMjmTxsHLIqsGDZE"
 | 
			
		||||
 | 
			
		||||
BUFF = ''
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def message_handler(bot: Bot, update: Update):
 | 
			
		||||
    sim = 5
 | 
			
		||||
    model = load_model('model.h5')
 | 
			
		||||
    global BUFF
 | 
			
		||||
 | 
			
		||||
    # load the mapping
 | 
			
		||||
    mapping = load(open('mapping.pkl', 'rb'))
 | 
			
		||||
    user = update.effective_user
 | 
			
		||||
 | 
			
		||||
    bot.send_message(chat_id=update.effective_message.chat_id,
 | 
			
		||||
                     text="Введи начало никнейма")
 | 
			
		||||
 | 
			
		||||
    text = update.effective_message.text
 | 
			
		||||
    text_in = BUFF + text
 | 
			
		||||
    nike = generate_seq(model, mapping, 10, text_in, sim)
 | 
			
		||||
    nik = ''
 | 
			
		||||
    iterator = (sim + len(text))*-1
 | 
			
		||||
    while iterator != 0:
 | 
			
		||||
        nik += nike[iterator]
 | 
			
		||||
        iterator += 1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    replay_text = f'{nik}'
 | 
			
		||||
    bot.send_message(chat_id=update.effective_message.chat_id,
 | 
			
		||||
                     text=replay_text)
 | 
			
		||||
 | 
			
		||||
    BUFF += nik
 | 
			
		||||
    return
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    bot = Bot(
 | 
			
		||||
        token=TG_TOKEN,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    updater = Updater(
 | 
			
		||||
        bot=bot,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    hendler = MessageHandler(Filters.all, message_handler)
 | 
			
		||||
    updater.dispatcher.add_handler(hendler)
 | 
			
		||||
    updater.start_polling()
 | 
			
		||||
    updater.idle()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    main()
 | 
			
		||||
							
								
								
									
										59
									
								
								Create_Model.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								Create_Model.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,59 @@
 | 
			
		||||
from numpy import array
 | 
			
		||||
from pickle import dump
 | 
			
		||||
from keras.utils import to_categorical
 | 
			
		||||
from keras.models import Sequential
 | 
			
		||||
from keras.layers import Dense
 | 
			
		||||
from keras.layers import LSTM
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# load doc into memory
 | 
			
		||||
def load_doc(filename):
 | 
			
		||||
    # open the file as read only
 | 
			
		||||
    file = open(filename, 'r')
 | 
			
		||||
    # read all text
 | 
			
		||||
    text = file.read()
 | 
			
		||||
    # close the file
 | 
			
		||||
    file.close()
 | 
			
		||||
    return text
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# load
 | 
			
		||||
in_filename = 'char_sequences.txt'
 | 
			
		||||
raw_text = load_doc(in_filename)
 | 
			
		||||
lines = raw_text.split('\n')
 | 
			
		||||
 | 
			
		||||
# integer encode sequences of characters
 | 
			
		||||
chars = sorted(list(set(raw_text)))
 | 
			
		||||
mapping = dict((c, i) for i, c in enumerate(chars))
 | 
			
		||||
sequences = list()
 | 
			
		||||
for line in lines:
 | 
			
		||||
    # integer encode line
 | 
			
		||||
    encoded_seq = [mapping[char] for char in line]
 | 
			
		||||
    # store
 | 
			
		||||
    sequences.append(encoded_seq)
 | 
			
		||||
 | 
			
		||||
# vocabulary size
 | 
			
		||||
vocab_size = len(mapping)
 | 
			
		||||
print('Vocabulary Size: %d' % vocab_size)
 | 
			
		||||
 | 
			
		||||
# separate into input and output
 | 
			
		||||
sequences = array(sequences)
 | 
			
		||||
X, y = sequences[:, :-1], sequences[:, -1]
 | 
			
		||||
sequences = [to_categorical(x, num_classes=vocab_size) for x in X]
 | 
			
		||||
X = array(sequences)
 | 
			
		||||
y = to_categorical(y, num_classes=vocab_size)
 | 
			
		||||
 | 
			
		||||
# define model
 | 
			
		||||
model = Sequential()
 | 
			
		||||
model.add(LSTM(75, input_shape=(X.shape[1], X.shape[2])))
 | 
			
		||||
model.add(Dense(vocab_size, activation='softmax'))
 | 
			
		||||
print(model.summary())
 | 
			
		||||
# compile model
 | 
			
		||||
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
 | 
			
		||||
# fit model
 | 
			
		||||
model.fit(X, y, epochs=100, verbose=2)
 | 
			
		||||
 | 
			
		||||
# save the model to file
 | 
			
		||||
model.save('model.h5')
 | 
			
		||||
# save the mapping
 | 
			
		||||
dump(mapping, open('mapping.pkl', 'wb'))
 | 
			
		||||
							
								
								
									
										40
									
								
								Create_data.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								Create_data.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,40 @@
 | 
			
		||||
# load doc into memory
 | 
			
		||||
def load_doc(filename):
 | 
			
		||||
    # open the file as read only
 | 
			
		||||
    file = open(filename, 'r')
 | 
			
		||||
    # read all text
 | 
			
		||||
    text = file.read()
 | 
			
		||||
    # close the file
 | 
			
		||||
    file.close()
 | 
			
		||||
    return text
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# save tokens to file, one dialog per line
 | 
			
		||||
def save_doc(lines, filename):
 | 
			
		||||
    data = '\n'.join(lines)
 | 
			
		||||
    file = open(filename, 'w')
 | 
			
		||||
    file.write(data)
 | 
			
		||||
    file.close()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# load text
 | 
			
		||||
raw_text = load_doc('rhyme.txt')
 | 
			
		||||
print(raw_text)
 | 
			
		||||
 | 
			
		||||
# clean
 | 
			
		||||
tokens = raw_text.split()
 | 
			
		||||
raw_text = ' '.join(tokens)
 | 
			
		||||
 | 
			
		||||
# organize into sequences of characters
 | 
			
		||||
length = 10
 | 
			
		||||
sequences = list()
 | 
			
		||||
for i in range(length, len(raw_text)):
 | 
			
		||||
    # select sequence of tokens
 | 
			
		||||
    seq = raw_text[i - length:i + 1]
 | 
			
		||||
    # store
 | 
			
		||||
    sequences.append(seq)
 | 
			
		||||
print('Total Sequences: %d' % len(sequences))
 | 
			
		||||
 | 
			
		||||
# save sequences to file
 | 
			
		||||
out_filename = 'char_sequences.txt'
 | 
			
		||||
save_doc(sequences, out_filename)
 | 
			
		||||
							
								
								
									
										42
									
								
								Generate.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								Generate.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,42 @@
 | 
			
		||||
from pickle import load
 | 
			
		||||
from keras.models import load_model
 | 
			
		||||
from keras.utils import to_categorical
 | 
			
		||||
from keras.preprocessing.sequence import pad_sequences
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# generate a sequence of characters with a language model
 | 
			
		||||
def generate_seq(model, mapping, seq_length, seed_text, n_chars):
 | 
			
		||||
    in_text = seed_text
 | 
			
		||||
    # generate a fixed number of characters
 | 
			
		||||
    for _ in range(n_chars):
 | 
			
		||||
        # encode the characters as integers
 | 
			
		||||
        encoded = [mapping[char] for char in in_text]
 | 
			
		||||
        # truncate sequences to a fixed length
 | 
			
		||||
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
 | 
			
		||||
        # one hot encode
 | 
			
		||||
        encoded = to_categorical(encoded, num_classes=len(mapping))
 | 
			
		||||
        # predict character
 | 
			
		||||
        yhat = model.predict_classes(encoded, verbose=0)
 | 
			
		||||
        # reverse map integer to character
 | 
			
		||||
        out_char = ''
 | 
			
		||||
        for char, index in mapping.items():
 | 
			
		||||
            if index == yhat:
 | 
			
		||||
                out_char = char
 | 
			
		||||
                break
 | 
			
		||||
        # append to input
 | 
			
		||||
        in_text += char
 | 
			
		||||
    return in_text
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# load the model
 | 
			
		||||
model = load_model('model.h5')
 | 
			
		||||
 | 
			
		||||
# load the mapping
 | 
			
		||||
mapping = load(open('mapping.pkl', 'rb'))
 | 
			
		||||
 | 
			
		||||
print(generate_seq(model, mapping, 10, 'Mar', 7))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										20108
									
								
								char_sequences.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20108
									
								
								char_sequences.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Loading…
	
		Reference in New Issue
	
	Block a user