Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,8 @@ from qdrant_client import QdrantClient
|
|
4 |
from qdrant_client.http.models import Distance, VectorParams
|
5 |
|
6 |
from langchain_qdrant import FastEmbedSparse, QdrantVectorStore, RetrievalMode
|
|
|
|
|
7 |
from qdrant_client import QdrantClient, models
|
8 |
from qdrant_client.http.models import Distance, SparseVectorParams, VectorParams
|
9 |
|
@@ -17,54 +19,124 @@ from pydantic import BaseModel, Field
|
|
17 |
class Data(BaseModel):
|
18 |
items: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(..., description="Either a dictionary or a list of dictionaries.")
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
document_1 = Document(
|
21 |
-
page_content="
|
22 |
-
metadata={"source": "
|
23 |
)
|
24 |
|
25 |
document_2 = Document(
|
26 |
-
page_content="
|
27 |
-
metadata={"source": "
|
28 |
)
|
29 |
|
30 |
document_3 = Document(
|
31 |
-
page_content="
|
32 |
-
metadata={"source": "
|
33 |
)
|
34 |
|
35 |
document_4 = Document(
|
36 |
-
page_content="
|
37 |
-
metadata={"source": "
|
38 |
)
|
39 |
|
40 |
document_5 = Document(
|
41 |
-
page_content="
|
42 |
-
metadata={"source": "
|
43 |
)
|
44 |
|
45 |
document_6 = Document(
|
46 |
-
page_content="
|
47 |
-
metadata={"source": "
|
48 |
)
|
49 |
|
50 |
document_7 = Document(
|
51 |
-
page_content="
|
52 |
-
metadata={"source": "
|
53 |
)
|
54 |
|
55 |
document_8 = Document(
|
56 |
-
page_content="
|
57 |
-
metadata={"source": "
|
58 |
)
|
59 |
|
60 |
document_9 = Document(
|
61 |
-
page_content="
|
62 |
-
metadata={"source": "
|
63 |
)
|
64 |
|
65 |
document_10 = Document(
|
66 |
-
page_content="
|
67 |
-
metadata={"source": "
|
68 |
)
|
69 |
|
70 |
documents = [
|
@@ -79,29 +151,31 @@ documents = [
|
|
79 |
document_9,
|
80 |
document_10,
|
81 |
]
|
82 |
-
uuids = [str(uuid4()) for _ in range(len(documents))]
|
83 |
|
|
|
84 |
docs = documents
|
85 |
|
86 |
-
|
|
|
|
|
87 |
|
88 |
client = QdrantClient(path="tmp/langchain_qdrant")
|
89 |
|
90 |
# Create a collection with sparse vectors
|
91 |
client.create_collection(
|
92 |
collection_name="my_documents",
|
93 |
-
vectors_config={"dense": VectorParams(size=3072, distance=Distance.COSINE)},
|
94 |
-
sparse_vectors_config={
|
95 |
-
|
96 |
-
},
|
97 |
)
|
98 |
|
99 |
qdrant = QdrantVectorStore(
|
100 |
client=client,
|
101 |
collection_name="my_documents",
|
102 |
sparse_embedding=sparse_embeddings,
|
103 |
-
retrieval_mode=RetrievalMode.SPARSE,
|
104 |
-
sparse_vector_name="sparse",
|
105 |
|
106 |
)
|
107 |
|
|
|
4 |
from qdrant_client.http.models import Distance, VectorParams
|
5 |
|
6 |
from langchain_qdrant import FastEmbedSparse, QdrantVectorStore, RetrievalMode
|
7 |
+
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
|
8 |
+
|
9 |
from qdrant_client import QdrantClient, models
|
10 |
from qdrant_client.http.models import Distance, SparseVectorParams, VectorParams
|
11 |
|
|
|
19 |
class Data(BaseModel):
|
20 |
items: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(..., description="Either a dictionary or a list of dictionaries.")
|
21 |
|
22 |
+
# document_1 = Document(
|
23 |
+
# page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
|
24 |
+
# metadata={"source": "tweet"},
|
25 |
+
# )
|
26 |
+
|
27 |
+
# document_2 = Document(
|
28 |
+
# page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees Fahrenheit.",
|
29 |
+
# metadata={"source": "news"},
|
30 |
+
# )
|
31 |
+
|
32 |
+
# document_3 = Document(
|
33 |
+
# page_content="Building an exciting new project with LangChain - come check it out!",
|
34 |
+
# metadata={"source": "tweet"},
|
35 |
+
# )
|
36 |
+
|
37 |
+
# document_4 = Document(
|
38 |
+
# page_content="Robbers broke into the city bank and stole $1 million in cash.",
|
39 |
+
# metadata={"source": "news"},
|
40 |
+
# )
|
41 |
+
|
42 |
+
# document_5 = Document(
|
43 |
+
# page_content="Wow! That was an amazing movie. I can't wait to see it again.",
|
44 |
+
# metadata={"source": "tweet"},
|
45 |
+
# )
|
46 |
+
|
47 |
+
# document_6 = Document(
|
48 |
+
# page_content="Is the new iPhone worth the price? Read this review to find out.",
|
49 |
+
# metadata={"source": "website"},
|
50 |
+
# )
|
51 |
+
|
52 |
+
# document_7 = Document(
|
53 |
+
# page_content="The top 10 soccer players in the world right now.",
|
54 |
+
# metadata={"source": "website"},
|
55 |
+
# )
|
56 |
+
|
57 |
+
# document_8 = Document(
|
58 |
+
# page_content="LangGraph is the best framework for building stateful, agentic applications!",
|
59 |
+
# metadata={"source": "tweet"},
|
60 |
+
# )
|
61 |
+
|
62 |
+
# document_9 = Document(
|
63 |
+
# page_content="The stock market is down 500 points today due to fears of a recession.",
|
64 |
+
# metadata={"source": "news"},
|
65 |
+
# )
|
66 |
+
|
67 |
+
# document_10 = Document(
|
68 |
+
# page_content="I have a bad feeling I am going to get deleted :(",
|
69 |
+
# metadata={"source": "tweet"},
|
70 |
+
# )
|
71 |
+
|
72 |
+
# documents = [
|
73 |
+
# document_1,
|
74 |
+
# document_2,
|
75 |
+
# document_3,
|
76 |
+
# document_4,
|
77 |
+
# document_5,
|
78 |
+
# document_6,
|
79 |
+
# document_7,
|
80 |
+
# document_8,
|
81 |
+
# document_9,
|
82 |
+
# document_10,
|
83 |
+
# ]
|
84 |
+
# uuids = [str(uuid4()) for _ in range(len(documents))]
|
85 |
+
|
86 |
+
# docs = documents
|
87 |
+
|
88 |
+
|
89 |
+
# from uuid import uuid4
|
90 |
+
# from langchain.schema import Document
|
91 |
+
|
92 |
document_1 = Document(
|
93 |
+
page_content="Aduan: Saya tidak bisa login ke sistem e-learning.\nJawaban: Kami menemukan bahwa akun Anda terkunci setelah tiga kali gagal login. Kami telah membuka kunci akun dan menyarankan Anda untuk melakukan reset password menggunakan fitur 'Lupa Kata Sandi'.",
|
94 |
+
metadata={"source": "Aduan"},
|
95 |
)
|
96 |
|
97 |
document_2 = Document(
|
98 |
+
page_content="Request: Mohon bantuannya untuk mendapatkan akses ke folder tim di server.\nJawaban: Kami telah menambahkan akun Anda ke grup pengguna 'Tim IT' di Active Directory. Akses ke folder sekarang dapat dilakukan setelah Anda login ulang.",
|
99 |
+
metadata={"source": "Request"},
|
100 |
)
|
101 |
|
102 |
document_3 = Document(
|
103 |
+
page_content="Incident: Laporan printer di lantai 3 tidak bisa mencetak.\nJawaban: Kami melakukan restart pada spooler service di perangkat printer dan membersihkan antrian cetak yang bermasalah. Printer sudah kembali normal.",
|
104 |
+
metadata={"source": "Incident"},
|
105 |
)
|
106 |
|
107 |
document_4 = Document(
|
108 |
+
page_content="Aduan: Email saya sering masuk ke folder spam penerima.\nJawaban: Kami periksa konfigurasi SPF, DKIM, dan DMARC pada domain Anda. Ternyata ada konfigurasi SPF yang tidak lengkap. Kami telah memperbaikinya dan hasil pengujian sudah menunjukkan pengiriman email berjalan normal.",
|
109 |
+
metadata={"source": "Aduan"},
|
110 |
)
|
111 |
|
112 |
document_5 = Document(
|
113 |
+
page_content="Request: Saya membutuhkan instalasi software AutoCAD untuk proyek desain.\nJawaban: Kami telah mengunduh versi terbaru dari situs resmi AutoDesk dan melakukan instalasi di laptop Anda. Lisensi telah diaktivasi menggunakan akun universitas.",
|
114 |
+
metadata={"source": "Request"},
|
115 |
)
|
116 |
|
117 |
document_6 = Document(
|
118 |
+
page_content="Incident: Sistem ERP tidak bisa mengakses modul keuangan sejak pagi.\nJawaban: Kami temukan bahwa service database MySQL berhenti secara tiba-tiba. Service telah kami nyalakan kembali dan modul keuangan kini dapat diakses kembali.",
|
119 |
+
metadata={"source": "Incident"},
|
120 |
)
|
121 |
|
122 |
document_7 = Document(
|
123 |
+
page_content="Aduan: Aplikasi mobile sering crash saat dibuka.\nJawaban: Kami analisis log error dan menemukan bug pada fitur notifikasi. Kami telah melakukan patch pada versi 1.2.3 dan memperbarui aplikasi Anda melalui MDM.",
|
124 |
+
metadata={"source": "Aduan"},
|
125 |
)
|
126 |
|
127 |
document_8 = Document(
|
128 |
+
page_content="Request: Mohon dibuatkan email dinas baru untuk staf baru di departemen HR.\nJawaban: Email telah dibuat dengan format [email protected] dan password default. Informasi login telah kami kirimkan melalui email pribadi yang terdaftar.",
|
129 |
+
metadata={"source": "Request"},
|
130 |
)
|
131 |
|
132 |
document_9 = Document(
|
133 |
+
page_content="Incident: Koneksi internet putus-putus di gedung B.\nJawaban: Kami lakukan pengecekan router dan mengganti kabel jaringan yang rusak di lantai 2. Koneksi kini stabil dan normal.",
|
134 |
+
metadata={"source": "Incident"},
|
135 |
)
|
136 |
|
137 |
document_10 = Document(
|
138 |
+
page_content="Aduan: Layar laptop saya berkedip-kedip.\nJawaban: Masalah disebabkan oleh driver grafis yang tidak kompatibel. Kami telah menginstal versi driver yang sesuai dengan perangkat Anda dan masalah layar sudah tidak muncul lagi.",
|
139 |
+
metadata={"source": "Aduan"},
|
140 |
)
|
141 |
|
142 |
documents = [
|
|
|
151 |
document_9,
|
152 |
document_10,
|
153 |
]
|
|
|
154 |
|
155 |
+
uuids = [str(uuid4()) for _ in range(len(documents))]
|
156 |
docs = documents
|
157 |
|
158 |
+
|
159 |
+
# sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
|
160 |
+
sparse_embeddings = FastEmbedEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
161 |
|
162 |
client = QdrantClient(path="tmp/langchain_qdrant")
|
163 |
|
164 |
# Create a collection with sparse vectors
|
165 |
client.create_collection(
|
166 |
collection_name="my_documents",
|
167 |
+
# vectors_config={"dense": VectorParams(size=3072, distance=Distance.COSINE)},
|
168 |
+
# sparse_vectors_config={
|
169 |
+
# "sparse": SparseVectorParams(index=models.SparseIndexParams(on_disk=False))
|
170 |
+
# },
|
171 |
)
|
172 |
|
173 |
qdrant = QdrantVectorStore(
|
174 |
client=client,
|
175 |
collection_name="my_documents",
|
176 |
sparse_embedding=sparse_embeddings,
|
177 |
+
# retrieval_mode=RetrievalMode.SPARSE,
|
178 |
+
# sparse_vector_name="sparse",
|
179 |
|
180 |
)
|
181 |
|