Spaces:
Sleeping
Sleeping
jonathanjordan21
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import numpy as np
|
|
5 |
|
6 |
from sentence_transformers import SentenceTransformer
|
7 |
from sentence_transformers.util import cos_sim
|
|
|
8 |
|
9 |
|
10 |
codes = """001 - Vehicle Registration (New)
|
@@ -361,6 +362,8 @@ model_ids = [
|
|
361 |
"sentence-transformers/distiluse-base-multilingual-cased-v2",
|
362 |
"Alibaba-NLP/gte-multilingual-base",
|
363 |
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
|
|
|
|
364 |
]
|
365 |
# model_id = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
|
366 |
# model_id = "Alibaba-NLP/gte-multilingual-base"
|
@@ -368,8 +371,18 @@ model_ids = [
|
|
368 |
# model_id = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
|
369 |
# model_id = "intfloat/multilingual-e5-small"
|
370 |
# model_id = "sentence-transformers/distiluse-base-multilingual-cased-v2"
|
|
|
371 |
model_id = model_ids[-1]
|
372 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
|
374 |
# codes_emb = model.encode([x[6:] for x in codes])
|
375 |
codes_emb = model.encode([x["examples"] for x in examples])#.mean(axis=1)
|
@@ -497,9 +510,15 @@ def respond(
|
|
497 |
plates = [" ".join(x).upper() for i,x in enumerate(matches)]
|
498 |
|
499 |
plate_numbers = ", ".join(plates)
|
500 |
-
|
501 |
-
|
502 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
503 |
|
504 |
scores_argsort = scores.argsort(descending=True)
|
505 |
weights = [18,8,7,6,5,4,3,2,1]
|
|
|
5 |
|
6 |
from sentence_transformers import SentenceTransformer
|
7 |
from sentence_transformers.util import cos_sim
|
8 |
+
from sentence_transformers import CrossEncoder
|
9 |
|
10 |
|
11 |
codes = """001 - Vehicle Registration (New)
|
|
|
362 |
"sentence-transformers/distiluse-base-multilingual-cased-v2",
|
363 |
"Alibaba-NLP/gte-multilingual-base",
|
364 |
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
365 |
+
"BAAI/bge-reranker-v2-m3",
|
366 |
+
"jinaai/jina-reranker-v2-base-multilingual"
|
367 |
]
|
368 |
# model_id = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
|
369 |
# model_id = "Alibaba-NLP/gte-multilingual-base"
|
|
|
371 |
# model_id = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
|
372 |
# model_id = "intfloat/multilingual-e5-small"
|
373 |
# model_id = "sentence-transformers/distiluse-base-multilingual-cased-v2"
|
374 |
+
|
375 |
model_id = model_ids[-1]
|
376 |
+
|
377 |
+
if model_id in model_ids[-2:]:
|
378 |
+
model = CrossEncoder(
|
379 |
+
# "jinaai/jina-reranker-v2-base-multilingual",
|
380 |
+
"BAAI/bge-reranker-v2-m3",
|
381 |
+
automodel_args={"torch_dtype": "auto"},
|
382 |
+
trust_remote_code=True,
|
383 |
+
)
|
384 |
+
else:
|
385 |
+
model = SentenceTransformer(model_id, trust_remote_code=True)
|
386 |
|
387 |
# codes_emb = model.encode([x[6:] for x in codes])
|
388 |
codes_emb = model.encode([x["examples"] for x in examples])#.mean(axis=1)
|
|
|
510 |
plates = [" ".join(x).upper() for i,x in enumerate(matches)]
|
511 |
|
512 |
plate_numbers = ", ".join(plates)
|
513 |
+
|
514 |
+
if model.config._name_or_path in model_ids[-2:]:
|
515 |
+
# documents = [v["name"] for v in detail_perhitungan.values()]
|
516 |
+
sentence_pairs = [[message, v["name"]] for v in detail_perhitungan.values()]
|
517 |
+
scores = model.predict(sentence_pairs, convert_to_tensor=True)
|
518 |
+
# scores = [x["score"] for x in model.rank(message, documents)]
|
519 |
+
else:
|
520 |
+
text_emb = model.encode(message)
|
521 |
+
scores = cos_sim(codes_emb, text_emb).mean(axis=-1)#[:,0]
|
522 |
|
523 |
scores_argsort = scores.argsort(descending=True)
|
524 |
weights = [18,8,7,6,5,4,3,2,1]
|