from flask import Flask |
import torch |
from torch import nn |
import re |
import numpy as np |
import pandas as pd |
from collections import OrderedDict |
app = Flask(__name__) |
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') |
if device == 'cuda:0': |
torch.cuda.set_device(device) |
print(device) |
def extract_text_from_link(url): |
response = requests.get(url) |
soup = BeautifulSoup(response.content, 'html.parser') |
text = soup.get_text() |
return text |
doc = """The word "deep" in "deep learning" refers to the number of layers through which the data is transformed. More precisely, |
deep learning systems have a substantial credit assignment path (CAP) depth. The CAP is the chain of transformations from input to |
output. CAPs describe potentially causal connections between input and output. For a feedforward neural network, the depth of the |
CAPs is that of the network and is the number of hidden layers plus one (as the output layer is also parameterized). For recurrent |
neural networks, in which a signal may propagate through a layer more than once, the CAP depth is potentially unlimited.[13] No |
universally agreed-upon threshold of depth divides shallow learning from deep learning, but most researchers agree that deep |
learning involves CAP depth higher than 2. CAP of depth 2 has been shown to be a universal approximator in the sense that it |
can emulate any function.[14] Beyond that, more layers do not add to the function approximator ability of the network. Deep |
models (CAP > 2) are able to extract better features than shallow models and hence, extra layers help in learning the features |
effectively.""" |
class Text2Words: |
def __init__(self, document): |
self.text_all = re.findall(r'\b[A-Za-z]+\b', document) |
self.text = list(set(self.text_all)) |
self.chars_all = ''.join(self.text) |
self.chars = self.unique_chars(self.chars_all) |
self.int2char = dict(enumerate(self.chars)) |
self.char2int = {char: ind for ind, char in self.int2char.items()} |
self.maxlen = len(max(self.text, key=len)) |
self.update_text() |
self.input_seq_char, self.target_seq_char = self.get_seq_char(self.text) |
self.input_seq_index, self.target_seq_index = self.get_seq(self.char2int, self.input_seq_char, self.target_seq_char, len(self.text)) |
self.dict_size = len(self.char2int) |
self.seq_len = self.maxlen - 1 |
self.batch_size = len(self.text) |
self.input_seq = self.one_hot_encode(self.input_seq_index, self.dict_size, self.seq_len, self.batch_size) |
def one_hot_encode(self, sequence, dict_size, seq_len, batch_size): |
features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32) |
for i in range(batch_size): |
for u in range(seq_len): |
features[i, u, sequence[i][u]] = 1 |
return features |
def get_seq(self, char2int, input_seq_char, target_seq_char,n): |
x=[] |
y=[] |
for i in range(n): |
x.append([char2int[character] for character in input_seq_char[i]]) |
y.append([char2int[character] for character in target_seq_char[i]]) |
return x,y |
def get_seq_char(self, text): |
input_seq = [] |
target_seq = [] |
for i in range(len(text)): |
input_seq.append(text[i][:-1]) |
target_seq.append(text[i][1:]) |
return input_seq, target_seq |
def unique_chars(self, chars_all): |
chars = [] |
for letter in chars_all: |
if letter not in chars: |
chars.append(letter) |
if ' ' not in chars: |
chars.append(' ') |
return sorted(chars) |
def update_text(self): |
for i in range(len(self.text)): |
while len(self.text[i])<self.maxlen: |
self.text[i] += ' ' |
def description(self): |
text = {} |
for word in self.text: |
char = word[0] |
if char not in text: |
text[char] = [] |
text[char].append(word.strip()) |
for k,v in (sorted(text.items())): |
print(f'{k} : {sorted(v)}') |
def lengt_analysis(self): |
text = {} |
words = set(self.text_all) |
for word in words: |
n = len(word) |
if n not in text: |
text[n] = [] |
text[n].append(word.strip()) |
for k,v in (sorted(text.items())): |
print(f'{k} : count = {len(v)} list = {sorted(v)}') |
return None |
def create_object(doc): |
return Text2Words(doc) |
def get_inputs(obj): |
input_seq = torch.tensor(obj.input_seq, device=device) |
target_seq_index = torch.tensor(obj.target_seq_index, device=device) |
return input_seq, target_seq_index |
class Model(nn.Module): |
def __init__(self, input_size, output_size, hidden_dim, n_layers): |
super(Model, self).__init__() |
self.hidden_dim = hidden_dim |
self.n_layers = n_layers |
self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True) |
self.fc = nn.Linear(hidden_dim, output_size) |
def forward(self, x): |
batch_size = x.size(0) |
hidden = self.init_hidden(batch_size) |
out, hidden = self.rnn(x, hidden) |
out = out.contiguous().view(-1, self.hidden_dim) |
out = self.fc(out) |
return out, hidden |
def init_hidden(self, batch_size): |
torch.manual_seed(42) |
hidden = torch.zeros((self.n_layers, batch_size, self.hidden_dim), device=device) |
return hidden |
def create_model(obj): |
model = Model(input_size=obj.dict_size, output_size=obj.dict_size, hidden_dim=2*obj.dict_size, n_layers=1) |
model.to(device) |
lr=0.01 |
criterion = nn.CrossEntropyLoss() |
optimizer = torch.optim.Adam(model.parameters(), lr=lr) |
return model, criterion, optimizer |
def predict(model, character): |
character = np.array([[obj.char2int[c] for c in character]]) |
character = obj.one_hot_encode(character, obj.dict_size, character.shape[1], 1) |
character = torch.tensor(character, device=device) |
character.to(device) |
out, hidden = model(character) |
prob = nn.functional.softmax(out[-1], dim=0).data |
char_ind = torch.max(prob, dim=0)[1].item() |
return obj.int2char[char_ind], hidden |
def sample(model, out_len, start='h'): |
model.eval() |
chars = [ch for ch in start] |
char = chars[-1] |
chars = chars[:-1] |
while char != ' ': |
chars.append(char) |
char, h = predict(model, chars) |
return ''.join(chars) |
def load_checkpoint(filepath): |
checkpoint = torch.load(filepath) |
model = checkpoint['model'] |
model.load_state_dict(checkpoint['state_dict']) |
model.eval() |
return model |
@app.route('/') |
def home(): |
print(1) |
model = load_checkpoint('checkpoint.pth') |
print(2) |
res = sample(model, obj.maxlen, 'ap') |
print(3) |
return {'key':res} |