AhmedSSabir commited on
Commit
ab08f57
·
verified ·
1 Parent(s): 71d1633

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -75
app.py CHANGED
@@ -17,21 +17,13 @@ import torch
17
  from transformers import GPT2Tokenizer, GPT2LMHeadModel
18
  from torch.nn.functional import softmax
19
  import numpy as np
20
-
21
 
22
 
23
  #url = "https://github.com/simonepri/lm-scorer/tree/master/lm_scorer/models"
24
  #resp = requests.get(url)
25
 
26
  from sentence_transformers import SentenceTransformer, util
27
- #from sentence_transformers import SentenceTransformer, util
28
- #from sklearn.metrics.pairwise import cosine_similarity
29
- #from lm_scorer.models.auto import AutoLMScorer as LMScorer
30
- #from sentence_transformers import SentenceTransformer, util
31
- #from sklearn.metrics.pairwise import cosine_similarity
32
-
33
- #device = "cuda:0" if torch.cuda.is_available() else "cpu"
34
- #model_sts = gr.Interface.load('huggingface/sentence-transformers/stsb-distilbert-base')
35
 
36
  #model_sts = SentenceTransformer('stsb-distilbert-base')
37
  model_sts = SentenceTransformer('roberta-large-nli-stsb-mean-tokens')
@@ -43,79 +35,33 @@ from transformers import GPT2Tokenizer, GPT2LMHeadModel
43
  import numpy as np
44
  import re
45
 
46
- # def Sort_Tuple(tup):
47
-
48
- # # (Sorts in descending order)
49
- # tup.sort(key = lambda x: x[1])
50
- # return tup[::-1]
51
-
52
 
53
- # def softmax(x):
54
- # exps = np.exp(x)
55
- # return np.divide(exps, np.sum(exps))
56
-
57
 
58
  def get_sim(x):
59
  x = str(x)[1:-1]
60
  x = str(x)[1:-1]
61
  return x
62
 
63
- # Load pre-trained model
64
-
65
- # model = GPT2LMHeadModel.from_pretrained('gpt2', output_hidden_states = True, output_attentions = True)
66
-
67
- # #model = gr.Interface.load('huggingface/distilgpt2', output_hidden_states = True, output_attentions = True)
68
-
69
- # #model.eval()
70
- # #tokenizer = gr.Interface.load('huggingface/distilgpt2')
71
-
72
- # tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
73
- # #tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
74
-
75
-
76
- # def cloze_prob(text):
77
-
78
- # whole_text_encoding = tokenizer.encode(text)
79
- # # Parse out the stem of the whole sentence (i.e., the part leading up to but not including the critical word)
80
- # text_list = text.split()
81
- # stem = ' '.join(text_list[:-1])
82
- # stem_encoding = tokenizer.encode(stem)
83
- # # cw_encoding is just the difference between whole_text_encoding and stem_encoding
84
- # # note: this might not correspond exactly to the word itself
85
- # cw_encoding = whole_text_encoding[len(stem_encoding):]
86
- # # Run the entire sentence through the model. Then go "back in time" to look at what the model predicted for each token, starting at the stem.
87
- # # Put the whole text encoding into a tensor, and get the model's comprehensive output
88
- # tokens_tensor = torch.tensor([whole_text_encoding])
89
-
90
- # with torch.no_grad():
91
- # outputs = model(tokens_tensor)
92
- # predictions = outputs[0]
93
-
94
- # logprobs = []
95
- # # start at the stem and get downstream probabilities incrementally from the model(see above)
96
- # start = -1-len(cw_encoding)
97
- # for j in range(start,-1,1):
98
- # raw_output = []
99
- # for i in predictions[-1][j]:
100
- # raw_output.append(i.item())
101
-
102
- # logprobs.append(np.log(softmax(raw_output)))
103
-
104
- # # if the critical word is three tokens long, the raw_probabilities should look something like this:
105
- # # [ [0.412, 0.001, ... ] ,[0.213, 0.004, ...], [0.002,0.001, 0.93 ...]]
106
- # # Then for the i'th token we want to find its associated probability
107
- # # this is just: raw_probabilities[i][token_index]
108
- # conditional_probs = []
109
- # for cw,prob in zip(cw_encoding,logprobs):
110
- # conditional_probs.append(prob[cw])
111
- # # now that you have all the relevant probabilities, return their product.
112
- # # This is the probability of the critical word given the context before it.
113
-
114
- # return np.exp(np.sum(conditional_probs))
115
-
116
-
117
- tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
118
- model = GPT2LMHeadModel.from_pretrained('gpt2')
119
 
120
  def sentence_prob_mean(text):
121
  # Tokenize the input text and add special tokens
 
17
  from transformers import GPT2Tokenizer, GPT2LMHeadModel
18
  from torch.nn.functional import softmax
19
  import numpy as np
20
+ from huggingface_hub import login
21
 
22
 
23
  #url = "https://github.com/simonepri/lm-scorer/tree/master/lm_scorer/models"
24
  #resp = requests.get(url)
25
 
26
  from sentence_transformers import SentenceTransformer, util
 
 
 
 
 
 
 
 
27
 
28
  #model_sts = SentenceTransformer('stsb-distilbert-base')
29
  model_sts = SentenceTransformer('roberta-large-nli-stsb-mean-tokens')
 
35
  import numpy as np
36
  import re
37
 
 
 
 
 
 
 
38
 
 
 
 
 
39
 
40
  def get_sim(x):
41
  x = str(x)[1:-1]
42
  x = str(x)[1:-1]
43
  return x
44
 
45
+
46
+
47
+ print(os.getenv('HF_token'))
48
+ hf_api_token = os.getenv("HF_token") # For sensitive secrets
49
+ #app_mode = os.getenv("APP_MODE") # For public variables
50
+
51
+
52
+ access_token = hf_api_token
53
+ #print(login(token = access_token))
54
+
55
+
56
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
57
+ model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B")
58
+
59
+
60
+
61
+
62
+
63
+ #tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
64
+ #model = GPT2LMHeadModel.from_pretrained('gpt2')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  def sentence_prob_mean(text):
67
  # Tokenize the input text and add special tokens