File size: 2,757 Bytes
60ca9f3
 
 
 
 
 
 
 
 
 
 
 
e97003d
60ca9f3
71a8c88
42accf0
2f42a1c
60ca9f3
 
 
71a8c88
 
 
 
 
 
 
 
 
60ca9f3
 
e97003d
60ca9f3
 
 
 
 
 
71a8c88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60ca9f3
 
a0c4644
60ca9f3
21186a6
e97003d
 
 
 
71a8c88
60ca9f3
71a8c88
60ca9f3
 
 
 
 
 
 
 
 
2f42a1c
60ca9f3
 
 
 
 
 
 
e38f762
 
2f42a1c
 
 
 
 
60ca9f3
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from fastapi import FastAPI, Query
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer, util
from datasets import load_dataset
from typing import List
import numpy as np
import base64
from PIL import Image
from io import BytesIO

app = FastAPI()


# Load Dataset
dataset = load_dataset("MohamedAshraf701/medicine-dataset", split="train")
# Limit the dataset to 30,000 entries


# Define fields for embedding
fields_for_embedding = [
    "product_name",
    "sub_category",
    "salt_composition",
    "product_manufactured",
    "medicine_desc",
    "side_effects",
    "drug",
    "brand",
    "effect"
]



# Load Sentence Transformer Model
model = SentenceTransformer("sentence-transformers/multi-qa-MiniLM-L6-cos-v1")

# Generate Embeddings
def create_combined_text(item):
    """
    Combines fields from an item into a single string for embedding,
    converting arrays to comma-separated strings where necessary.
    """
    combined_text = []
    for field in fields_for_embedding:
        value = item.get(field)
        if value:
            # If the field is a list, join its elements into a single string
            if isinstance(value, list):
                combined_text.append(", ".join(map(str, value)))
            else:
                combined_text.append(str(value))
    return " ".join(combined_text)



embeddings = dataset["embeddings"]

@app.get("/gen")
def root():
    # Prepare Data
    return {"message": "Welcome to the medicine Search API!"}
    
@app.get("/meds")
def search_products(
    query: str = Query("", title="Search Query", description="Search term for medicine"),
    page: int = Query(1, ge=1, title="Page Number"),
    items_per_page: int = Query(10, ge=1, le=100, title="Items Per Page"),
):
    # Perform Search
    if query:
        query_embedding = model.encode(query, convert_to_tensor=True)
        scores = util.cos_sim(query_embedding, embeddings).squeeze().tolist()
        ranked_indices = np.argsort(scores)[::-1]
    else:
        ranked_indices = np.arange(len(dataset))

    # Pagination
    total_items = len(ranked_indices)
    total_pages = (total_items + items_per_page - 1) // items_per_page
    start_idx = (page - 1) * items_per_page
    end_idx = start_idx + items_per_page
    paginated_indices = ranked_indices[start_idx:end_idx]
    # Prepare Response using select()
    paginated_dataset = dataset.select(paginated_indices)
    # Exclude the 'embeddings' column
    results = [
    {key: value for key, value in item.items() if key != "embeddings"}
    for item in paginated_dataset
    ]

    # Construct the API response
    return {
        "status": 200,
        "data": results,
        "totalpages": total_pages,
        "currentpage": page
    }