AhmedSSabir
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -17,21 +17,13 @@ import torch
|
|
17 |
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
18 |
from torch.nn.functional import softmax
|
19 |
import numpy as np
|
20 |
-
|
21 |
|
22 |
|
23 |
#url = "https://github.com/simonepri/lm-scorer/tree/master/lm_scorer/models"
|
24 |
#resp = requests.get(url)
|
25 |
|
26 |
from sentence_transformers import SentenceTransformer, util
|
27 |
-
#from sentence_transformers import SentenceTransformer, util
|
28 |
-
#from sklearn.metrics.pairwise import cosine_similarity
|
29 |
-
#from lm_scorer.models.auto import AutoLMScorer as LMScorer
|
30 |
-
#from sentence_transformers import SentenceTransformer, util
|
31 |
-
#from sklearn.metrics.pairwise import cosine_similarity
|
32 |
-
|
33 |
-
#device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
34 |
-
#model_sts = gr.Interface.load('huggingface/sentence-transformers/stsb-distilbert-base')
|
35 |
|
36 |
#model_sts = SentenceTransformer('stsb-distilbert-base')
|
37 |
model_sts = SentenceTransformer('roberta-large-nli-stsb-mean-tokens')
|
@@ -43,79 +35,33 @@ from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
|
43 |
import numpy as np
|
44 |
import re
|
45 |
|
46 |
-
# def Sort_Tuple(tup):
|
47 |
-
|
48 |
-
# # (Sorts in descending order)
|
49 |
-
# tup.sort(key = lambda x: x[1])
|
50 |
-
# return tup[::-1]
|
51 |
-
|
52 |
|
53 |
-
# def softmax(x):
|
54 |
-
# exps = np.exp(x)
|
55 |
-
# return np.divide(exps, np.sum(exps))
|
56 |
-
|
57 |
|
58 |
def get_sim(x):
|
59 |
x = str(x)[1:-1]
|
60 |
x = str(x)[1:-1]
|
61 |
return x
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
#
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
#
|
82 |
-
#
|
83 |
-
# # cw_encoding is just the difference between whole_text_encoding and stem_encoding
|
84 |
-
# # note: this might not correspond exactly to the word itself
|
85 |
-
# cw_encoding = whole_text_encoding[len(stem_encoding):]
|
86 |
-
# # Run the entire sentence through the model. Then go "back in time" to look at what the model predicted for each token, starting at the stem.
|
87 |
-
# # Put the whole text encoding into a tensor, and get the model's comprehensive output
|
88 |
-
# tokens_tensor = torch.tensor([whole_text_encoding])
|
89 |
-
|
90 |
-
# with torch.no_grad():
|
91 |
-
# outputs = model(tokens_tensor)
|
92 |
-
# predictions = outputs[0]
|
93 |
-
|
94 |
-
# logprobs = []
|
95 |
-
# # start at the stem and get downstream probabilities incrementally from the model(see above)
|
96 |
-
# start = -1-len(cw_encoding)
|
97 |
-
# for j in range(start,-1,1):
|
98 |
-
# raw_output = []
|
99 |
-
# for i in predictions[-1][j]:
|
100 |
-
# raw_output.append(i.item())
|
101 |
-
|
102 |
-
# logprobs.append(np.log(softmax(raw_output)))
|
103 |
-
|
104 |
-
# # if the critical word is three tokens long, the raw_probabilities should look something like this:
|
105 |
-
# # [ [0.412, 0.001, ... ] ,[0.213, 0.004, ...], [0.002,0.001, 0.93 ...]]
|
106 |
-
# # Then for the i'th token we want to find its associated probability
|
107 |
-
# # this is just: raw_probabilities[i][token_index]
|
108 |
-
# conditional_probs = []
|
109 |
-
# for cw,prob in zip(cw_encoding,logprobs):
|
110 |
-
# conditional_probs.append(prob[cw])
|
111 |
-
# # now that you have all the relevant probabilities, return their product.
|
112 |
-
# # This is the probability of the critical word given the context before it.
|
113 |
-
|
114 |
-
# return np.exp(np.sum(conditional_probs))
|
115 |
-
|
116 |
-
|
117 |
-
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
118 |
-
model = GPT2LMHeadModel.from_pretrained('gpt2')
|
119 |
|
120 |
def sentence_prob_mean(text):
|
121 |
# Tokenize the input text and add special tokens
|
|
|
17 |
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
18 |
from torch.nn.functional import softmax
|
19 |
import numpy as np
|
20 |
+
from huggingface_hub import login
|
21 |
|
22 |
|
23 |
#url = "https://github.com/simonepri/lm-scorer/tree/master/lm_scorer/models"
|
24 |
#resp = requests.get(url)
|
25 |
|
26 |
from sentence_transformers import SentenceTransformer, util
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
#model_sts = SentenceTransformer('stsb-distilbert-base')
|
29 |
model_sts = SentenceTransformer('roberta-large-nli-stsb-mean-tokens')
|
|
|
35 |
import numpy as np
|
36 |
import re
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
|
|
|
|
|
|
|
|
39 |
|
40 |
def get_sim(x):
|
41 |
x = str(x)[1:-1]
|
42 |
x = str(x)[1:-1]
|
43 |
return x
|
44 |
|
45 |
+
|
46 |
+
|
47 |
+
print(os.getenv('HF_token'))
|
48 |
+
hf_api_token = os.getenv("HF_token") # For sensitive secrets
|
49 |
+
#app_mode = os.getenv("APP_MODE") # For public variables
|
50 |
+
|
51 |
+
|
52 |
+
access_token = hf_api_token
|
53 |
+
#print(login(token = access_token))
|
54 |
+
|
55 |
+
|
56 |
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
|
57 |
+
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B")
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
#tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
64 |
+
#model = GPT2LMHeadModel.from_pretrained('gpt2')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
def sentence_prob_mean(text):
|
67 |
# Tokenize the input text and add special tokens
|