jonathanjordan21 commited on
Commit
109534c
·
verified ·
1 Parent(s): 20469f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -28
app.py CHANGED
@@ -4,6 +4,8 @@ from qdrant_client import QdrantClient
4
  from qdrant_client.http.models import Distance, VectorParams
5
 
6
  from langchain_qdrant import FastEmbedSparse, QdrantVectorStore, RetrievalMode
 
 
7
  from qdrant_client import QdrantClient, models
8
  from qdrant_client.http.models import Distance, SparseVectorParams, VectorParams
9
 
@@ -17,54 +19,124 @@ from pydantic import BaseModel, Field
17
  class Data(BaseModel):
18
  items: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(..., description="Either a dictionary or a list of dictionaries.")
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  document_1 = Document(
21
- page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
22
- metadata={"source": "tweet"},
23
  )
24
 
25
  document_2 = Document(
26
- page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees Fahrenheit.",
27
- metadata={"source": "news"},
28
  )
29
 
30
  document_3 = Document(
31
- page_content="Building an exciting new project with LangChain - come check it out!",
32
- metadata={"source": "tweet"},
33
  )
34
 
35
  document_4 = Document(
36
- page_content="Robbers broke into the city bank and stole $1 million in cash.",
37
- metadata={"source": "news"},
38
  )
39
 
40
  document_5 = Document(
41
- page_content="Wow! That was an amazing movie. I can't wait to see it again.",
42
- metadata={"source": "tweet"},
43
  )
44
 
45
  document_6 = Document(
46
- page_content="Is the new iPhone worth the price? Read this review to find out.",
47
- metadata={"source": "website"},
48
  )
49
 
50
  document_7 = Document(
51
- page_content="The top 10 soccer players in the world right now.",
52
- metadata={"source": "website"},
53
  )
54
 
55
  document_8 = Document(
56
- page_content="LangGraph is the best framework for building stateful, agentic applications!",
57
- metadata={"source": "tweet"},
58
  )
59
 
60
  document_9 = Document(
61
- page_content="The stock market is down 500 points today due to fears of a recession.",
62
- metadata={"source": "news"},
63
  )
64
 
65
  document_10 = Document(
66
- page_content="I have a bad feeling I am going to get deleted :(",
67
- metadata={"source": "tweet"},
68
  )
69
 
70
  documents = [
@@ -79,29 +151,31 @@ documents = [
79
  document_9,
80
  document_10,
81
  ]
82
- uuids = [str(uuid4()) for _ in range(len(documents))]
83
 
 
84
  docs = documents
85
 
86
- sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
 
 
87
 
88
  client = QdrantClient(path="tmp/langchain_qdrant")
89
 
90
  # Create a collection with sparse vectors
91
  client.create_collection(
92
  collection_name="my_documents",
93
- vectors_config={"dense": VectorParams(size=3072, distance=Distance.COSINE)},
94
- sparse_vectors_config={
95
- "sparse": SparseVectorParams(index=models.SparseIndexParams(on_disk=False))
96
- },
97
  )
98
 
99
  qdrant = QdrantVectorStore(
100
  client=client,
101
  collection_name="my_documents",
102
  sparse_embedding=sparse_embeddings,
103
- retrieval_mode=RetrievalMode.SPARSE,
104
- sparse_vector_name="sparse",
105
 
106
  )
107
 
 
4
  from qdrant_client.http.models import Distance, VectorParams
5
 
6
  from langchain_qdrant import FastEmbedSparse, QdrantVectorStore, RetrievalMode
7
+ from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
8
+
9
  from qdrant_client import QdrantClient, models
10
  from qdrant_client.http.models import Distance, SparseVectorParams, VectorParams
11
 
 
19
  class Data(BaseModel):
20
  items: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(..., description="Either a dictionary or a list of dictionaries.")
21
 
22
+ # document_1 = Document(
23
+ # page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
24
+ # metadata={"source": "tweet"},
25
+ # )
26
+
27
+ # document_2 = Document(
28
+ # page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees Fahrenheit.",
29
+ # metadata={"source": "news"},
30
+ # )
31
+
32
+ # document_3 = Document(
33
+ # page_content="Building an exciting new project with LangChain - come check it out!",
34
+ # metadata={"source": "tweet"},
35
+ # )
36
+
37
+ # document_4 = Document(
38
+ # page_content="Robbers broke into the city bank and stole $1 million in cash.",
39
+ # metadata={"source": "news"},
40
+ # )
41
+
42
+ # document_5 = Document(
43
+ # page_content="Wow! That was an amazing movie. I can't wait to see it again.",
44
+ # metadata={"source": "tweet"},
45
+ # )
46
+
47
+ # document_6 = Document(
48
+ # page_content="Is the new iPhone worth the price? Read this review to find out.",
49
+ # metadata={"source": "website"},
50
+ # )
51
+
52
+ # document_7 = Document(
53
+ # page_content="The top 10 soccer players in the world right now.",
54
+ # metadata={"source": "website"},
55
+ # )
56
+
57
+ # document_8 = Document(
58
+ # page_content="LangGraph is the best framework for building stateful, agentic applications!",
59
+ # metadata={"source": "tweet"},
60
+ # )
61
+
62
+ # document_9 = Document(
63
+ # page_content="The stock market is down 500 points today due to fears of a recession.",
64
+ # metadata={"source": "news"},
65
+ # )
66
+
67
+ # document_10 = Document(
68
+ # page_content="I have a bad feeling I am going to get deleted :(",
69
+ # metadata={"source": "tweet"},
70
+ # )
71
+
72
+ # documents = [
73
+ # document_1,
74
+ # document_2,
75
+ # document_3,
76
+ # document_4,
77
+ # document_5,
78
+ # document_6,
79
+ # document_7,
80
+ # document_8,
81
+ # document_9,
82
+ # document_10,
83
+ # ]
84
+ # uuids = [str(uuid4()) for _ in range(len(documents))]
85
+
86
+ # docs = documents
87
+
88
+
89
+ # from uuid import uuid4
90
+ # from langchain.schema import Document
91
+
92
  document_1 = Document(
93
+ page_content="Aduan: Saya tidak bisa login ke sistem e-learning.\nJawaban: Kami menemukan bahwa akun Anda terkunci setelah tiga kali gagal login. Kami telah membuka kunci akun dan menyarankan Anda untuk melakukan reset password menggunakan fitur 'Lupa Kata Sandi'.",
94
+ metadata={"source": "Aduan"},
95
  )
96
 
97
  document_2 = Document(
98
+ page_content="Request: Mohon bantuannya untuk mendapatkan akses ke folder tim di server.\nJawaban: Kami telah menambahkan akun Anda ke grup pengguna 'Tim IT' di Active Directory. Akses ke folder sekarang dapat dilakukan setelah Anda login ulang.",
99
+ metadata={"source": "Request"},
100
  )
101
 
102
  document_3 = Document(
103
+ page_content="Incident: Laporan printer di lantai 3 tidak bisa mencetak.\nJawaban: Kami melakukan restart pada spooler service di perangkat printer dan membersihkan antrian cetak yang bermasalah. Printer sudah kembali normal.",
104
+ metadata={"source": "Incident"},
105
  )
106
 
107
  document_4 = Document(
108
+ page_content="Aduan: Email saya sering masuk ke folder spam penerima.\nJawaban: Kami periksa konfigurasi SPF, DKIM, dan DMARC pada domain Anda. Ternyata ada konfigurasi SPF yang tidak lengkap. Kami telah memperbaikinya dan hasil pengujian sudah menunjukkan pengiriman email berjalan normal.",
109
+ metadata={"source": "Aduan"},
110
  )
111
 
112
  document_5 = Document(
113
+ page_content="Request: Saya membutuhkan instalasi software AutoCAD untuk proyek desain.\nJawaban: Kami telah mengunduh versi terbaru dari situs resmi AutoDesk dan melakukan instalasi di laptop Anda. Lisensi telah diaktivasi menggunakan akun universitas.",
114
+ metadata={"source": "Request"},
115
  )
116
 
117
  document_6 = Document(
118
+ page_content="Incident: Sistem ERP tidak bisa mengakses modul keuangan sejak pagi.\nJawaban: Kami temukan bahwa service database MySQL berhenti secara tiba-tiba. Service telah kami nyalakan kembali dan modul keuangan kini dapat diakses kembali.",
119
+ metadata={"source": "Incident"},
120
  )
121
 
122
  document_7 = Document(
123
+ page_content="Aduan: Aplikasi mobile sering crash saat dibuka.\nJawaban: Kami analisis log error dan menemukan bug pada fitur notifikasi. Kami telah melakukan patch pada versi 1.2.3 dan memperbarui aplikasi Anda melalui MDM.",
124
+ metadata={"source": "Aduan"},
125
  )
126
 
127
  document_8 = Document(
128
+ page_content="Request: Mohon dibuatkan email dinas baru untuk staf baru di departemen HR.\nJawaban: Email telah dibuat dengan format [email protected] dan password default. Informasi login telah kami kirimkan melalui email pribadi yang terdaftar.",
129
+ metadata={"source": "Request"},
130
  )
131
 
132
  document_9 = Document(
133
+ page_content="Incident: Koneksi internet putus-putus di gedung B.\nJawaban: Kami lakukan pengecekan router dan mengganti kabel jaringan yang rusak di lantai 2. Koneksi kini stabil dan normal.",
134
+ metadata={"source": "Incident"},
135
  )
136
 
137
  document_10 = Document(
138
+ page_content="Aduan: Layar laptop saya berkedip-kedip.\nJawaban: Masalah disebabkan oleh driver grafis yang tidak kompatibel. Kami telah menginstal versi driver yang sesuai dengan perangkat Anda dan masalah layar sudah tidak muncul lagi.",
139
+ metadata={"source": "Aduan"},
140
  )
141
 
142
  documents = [
 
151
  document_9,
152
  document_10,
153
  ]
 
154
 
155
+ uuids = [str(uuid4()) for _ in range(len(documents))]
156
  docs = documents
157
 
158
+
159
+ # sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
160
+ sparse_embeddings = FastEmbedEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
161
 
162
  client = QdrantClient(path="tmp/langchain_qdrant")
163
 
164
  # Create a collection with sparse vectors
165
  client.create_collection(
166
  collection_name="my_documents",
167
+ # vectors_config={"dense": VectorParams(size=3072, distance=Distance.COSINE)},
168
+ # sparse_vectors_config={
169
+ # "sparse": SparseVectorParams(index=models.SparseIndexParams(on_disk=False))
170
+ # },
171
  )
172
 
173
  qdrant = QdrantVectorStore(
174
  client=client,
175
  collection_name="my_documents",
176
  sparse_embedding=sparse_embeddings,
177
+ # retrieval_mode=RetrievalMode.SPARSE,
178
+ # sparse_vector_name="sparse",
179
 
180
  )
181