TusharGoel commited on
Commit
17a9f25
·
1 Parent(s): 7bb3b36

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +61 -0
README.md CHANGED
@@ -1,3 +1,64 @@
1
  ---
2
  license: mit
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
+ language:
4
+ - en
5
+ library_name: transformers
6
+ pipeline_tag: document-question-answering
7
  ---
8
+
9
+ Fine tuned on DocVQA Dataset 40000 questions
10
+
11
+ ```python
12
+ import json
13
+ from glob import glob
14
+ from transformers import AutoProcessor, AutoModelForDocumentQuestionAnswering
15
+
16
+ import torch
17
+ import numpy as np
18
+
19
+ model_name = "TusharGoel/LayoutLMv2-finetuned-docvqa"
20
+ processor = AutoProcessor.from_pretrained(model_name)
21
+ model = AutoModelForDocumentQuestionAnswering.from_pretrained(model_name)
22
+
23
+
24
+ def pipeline(question, words, boxes, **kwargs):
25
+
26
+ images = kwargs["images"]
27
+ try:
28
+ encoding = processor(
29
+ images, question, words,boxes = boxes, return_token_type_ids=True, return_tensors="pt", truncation = True
30
+ )
31
+ word_ids = encoding.word_ids(0)
32
+
33
+ outputs = model(**encoding)
34
+
35
+ start_scores = outputs.start_logits
36
+ end_scores = outputs.end_logits
37
+
38
+
39
+ start, end = word_ids[start_scores.argmax(-1)], word_ids[end_scores.argmax(-1)]
40
+ answer = " ".join(words[start : end + 1])
41
+
42
+ start_scores, end_scores = start_scores.detach().numpy(), end_scores.detach().numpy()
43
+ undesired_tokens = encoding['attention_mask']
44
+ undesired_tokens_mask = undesired_tokens == 0.0
45
+
46
+ start_ = np.where(undesired_tokens_mask, -10000.0, start_scores)
47
+ end_ = np.where(undesired_tokens_mask, -10000.0, end_scores)
48
+ start_ = np.exp(start_ - np.log(np.sum(np.exp(start_), axis=-1, keepdims=True)))
49
+ end_ = np.exp(end_ - np.log(np.sum(np.exp(end_), axis=-1, keepdims=True)))
50
+
51
+ outer = np.matmul(np.expand_dims(start_, -1), np.expand_dims(end_, 1))
52
+ max_answer_len = 20
53
+ candidates = np.tril(np.triu(outer), max_answer_len - 1)
54
+ scores_flat = candidates.flatten()
55
+
56
+ idx_sort = [np.argmax(scores_flat)]
57
+ start, end = np.unravel_index(idx_sort, candidates.shape)[1:]
58
+
59
+ scores = candidates[0, start, end]
60
+ score = scores[0]
61
+ except Exception as e:
62
+ answer, score = "", 0.0
63
+ return answer, score
64
+ ```