File size: 3,418 Bytes
70d1b19
 
193ebc9
70d1b19
e20cae0
cd7b287
d11a12a
193ebc9
c18ff33
b4b033d
51d9b29
193ebc9
51d9b29
 
 
b4b033d
51d9b29
193ebc9
51d9b29
 
 
193ebc9
51d9b29
 
 
 
 
 
 
 
193ebc9
51d9b29
 
 
 
 
 
 
 
 
70d1b19
67e22f5
 
70d1b19
96a67e4
d11a12a
 
 
193ebc9
70d1b19
d11a12a
 
 
70d1b19
d11a12a
 
 
70d1b19
d11a12a
 
 
 
 
6470055
d11a12a
 
4d18279
d11a12a
798e47e
c18ff33
47e5f50
4d18279
 
 
798e47e
d11a12a
 
 
 
 
 
4d18279
d11a12a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import torch
import pandas as pd
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, TrainingArguments, Trainer
import gradio as gr
from gradio.mix import Parallel, Series
#import torch.nn.functional as F
from aitextgen import aitextgen

#IMPLEMENT FINE TUNING

#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#from datasets import load_dataset
#dataset = load_dataset("bananabot/engMollywoodSummaries")
#tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")

#tokenizer.pad_token = tokenizer.eos_token

#def tokenize_function(examples):
#    return tokenizer(examples["text"], padding="max_length", truncation=True)
#tokenized_datasets = dataset.map(tokenize_function, batched=True)

#model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B").to(device)
#training_args = TrainingArguments(output_dir="test_trainer")
#small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
#small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
#def compute_metrics(eval_pred):
#    logits, labels = eval_pred
#    predictions = np.argmax(logits, axis=-1)
#    return metric.compute(predictions=predictions, references=labels)

#trainer = Trainer(
#    model=model,
#    args=training_args,
#    train_dataset=small_train_dataset,
#    eval_dataset=small_eval_dataset,
#    compute_metrics=compute_metrics,
#)

#trainer.train()




ai = aitextgen(model="EleutherAI/gpt-neo-125M")

#model_name = "EleutherAI/gpt-neo-125M"
#tokenizer = AutoTokenizer.from_pretrained(model_name)


#max_length=123
#input_txt = "This malayalam movie is about"
#n_steps = 8

#input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
#output = model.generate(input_ids,  max_length=max_length, num_beams=5, do_sample=True, no_repeat_ngram_size=2, temperature=1.37, top_k=69, top_p=0.96)
#print(tokenizer.decode(output[0]))

#def generate(input_txt):
#    output = model.generate(input_ids,  max_length=max_length, num_beams=5, do_sample=True, no_repeat_ngram_size=2, temperature=1.37,           top_k=69, top_p=0.96)
#    print (output)

def ai_text(inp):
  generated_text = ai.generate_one(max_length=234, prompt = inp, no_repeat_ngram_size=3, num_beams=7, do_sample=True, temperature=1.37, top_k=69, top_p=0.96)
  print(type(generated_text))
  return generated_text
    
#inputs= gr.inputs.Textbox(lines=7, placeholder="Enter the beginning of your mollywood movie idea and the നിർമ്മിത ബുദ്ധി will fill in the rest...")

#IMPLEMENT KURIAN BENOY

#generator = output 
#translator = gr.Interface.load("models/Helsinki-NLP/opus-mt-en-ml")
#gr.Series(generator, translator, inputs=gr.inputs.Textbox(lines=7, label="Input Text")).launch()  # this demo generates text, then translates it to Malayalam, and outputs the final result.

#interface = gr.Interface(fn=generate, 
#                        inputs=inputs,
#                        outputs='text',
#                        title='AI based Mollywood movie idea generator')

#interface.launch()

output_text = gr.outputs.Textbox()
gr.Interface(ai_text,"textbox", output_text, title="AI based Mollywood movie idea generator",
             description="Enter the beginning of your malayalam movie idea and the നിർമ്മിത ബുദ്ധി will fill in the rest...").launch()