MrVicente commited on
Commit
50f84cc
1 Parent(s): b1a32e2

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +69 -0
README.md ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ tags:
4
+ - generative qa
5
+ datasets:
6
+ - eli5
7
+ - stackexchange(pets, cooking, gardening, diy, crafts)
8
+ ---
9
+
10
+ Work by [Frederico Vicente](https://huggingface.co/mrvicente) & [Diogo Tavares](https://huggingface.co/d-c-t). We finetuned BART Large for the task of generative question answering. It was trained on eli5, askScience and stackexchange using the following forums: pets, cooking, gardening, diy, crafts.
11
+
12
+ ### Usage
13
+
14
+ ```python
15
+
16
+ from transformers import (
17
+ BartForConditionalGeneration,
18
+ BartTokenizer
19
+ )
20
+ import torch
21
+ import json
22
+
23
+ def read_json_file_2_dict(filename, store_dir='.'):
24
+ with open(f'{store_dir}/{filename}', 'r', encoding='utf-8') as file:
25
+ return json.load(file)
26
+
27
+ def get_device():
28
+ # If there's a GPU available...
29
+ if torch.cuda.is_available():
30
+ device = torch.device("cuda")
31
+ n_gpus = torch.cuda.device_count()
32
+ first_gpu = torch.cuda.get_device_name(0)
33
+
34
+ print(f'There are {n_gpus} GPU(s) available.')
35
+ print(f'GPU gonna be used: {first_gpu}')
36
+ else:
37
+ print('No GPU available, using the CPU instead.')
38
+ device = torch.device("cpu")
39
+ return device
40
+
41
+ model_name = 'unlisboa/bart_qa_assistant'
42
+ tokenizer = BartTokenizer.from_pretrained(model_name)
43
+ device = get_device()
44
+ model = BartForConditionalGeneration.from_pretrained(model_name).to(device)
45
+ model.eval()
46
+
47
+ model_input = tokenizer(question, truncation=True, padding=True, return_tensors="pt")
48
+ generated_answers_encoded = model.generate(input_ids=model_input["input_ids"].to(device),attention_mask=model_input["attention_mask"].to(device),
49
+ force_words_ids=None,
50
+ min_length=1,
51
+ max_length=100,
52
+ do_sample=True,
53
+ bad_words_ids=bad_words_ids,
54
+ early_stopping=True,
55
+ num_beams=4,
56
+ temperature=1.0,
57
+ top_k=None,
58
+ top_p=None,
59
+ # eos_token_id=tokenizer.eos_token_id,
60
+ no_repeat_ngram_size=2,
61
+ num_return_sequences=1,
62
+ return_dict_in_generate=True,
63
+ output_scores=True)
64
+ response = tokenizer.batch_decode(generated_answers_encoded['sequences'], skip_special_tokens=True,clean_up_tokenization_spaces=True)
65
+ print(response)
66
+
67
+ ```
68
+
69
+ Have fun!