RohanVashisht commited on
Commit
2401907
·
verified ·
1 Parent(s): f33755e

Upload folder using huggingface_hub

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Zigistry.pdf filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ __pycache__/
2
+ PERSIST_DIR/
3
+ .gradio/
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
  title: Zigister
3
- emoji:
4
- colorFrom: red
5
- colorTo: blue
6
  sdk: gradio
7
  sdk_version: 5.16.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Zigister
3
+ emoji: 🦀
4
+ colorFrom: yellow
5
+ colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 5.16.0
8
+ app_file: main.py
9
+ pinned: true
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
Zigistry.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbedcfc405d05e37a1ebcf60c2e167555c80ef88901bc835e5e97e2045c09d31
3
+ size 517396
main.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Main script to run the LLM chatbot using the Zigistry framework
3
+
4
+ This script performs the following tasks:
5
+ 1. Perform pre-requisites check
6
+ 2. Configure LLM and embedding model
7
+ 3. Data ingestion
8
+ 4. Query handling
9
+ 5. Launch the chatbot interface
10
+ """
11
+
12
+ # --- Importing required libraries ---
13
+ from zigistry import constants
14
+ from zigistry import pre_requisite
15
+ import gradio as gr
16
+ from llama_index.core import (
17
+ StorageContext,
18
+ load_index_from_storage,
19
+ VectorStoreIndex,
20
+ SimpleDirectoryReader,
21
+ ChatPromptTemplate,
22
+ Settings,
23
+ )
24
+ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
25
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
26
+
27
+ # -------------------------------------
28
+
29
+
30
+ # --- Perform pre-requisites and pre-requisite check ---
31
+ pre_requisite.performPreRequisites()
32
+
33
+ if not pre_requisite.CorrectlyCompletedPreRequisites():
34
+ exit(1)
35
+ # ------------------------------------------------------
36
+
37
+
38
+ # --- Configure LLM and embedding model ---
39
+ Settings.llm = HuggingFaceInferenceAPI(
40
+ model_name=constants.LLM,
41
+ tokenizer_name=constants.LLM,
42
+ context_window=3000,
43
+ token=constants.HF_TOKEN,
44
+ max_new_tokens=512,
45
+ generate_kwargs={"temperature": constants.TEMPERATURE},
46
+ )
47
+
48
+ Settings.embed_model = HuggingFaceEmbedding(model_name=constants.EMBEDDER)
49
+ # -----------------------------------------
50
+
51
+
52
+ def data_ingestion():
53
+ """
54
+ Ingest data from the input files and create an index
55
+ """
56
+ documents = SimpleDirectoryReader(input_files=constants.FILES).load_data()
57
+ index = VectorStoreIndex.from_documents(documents)
58
+ index.storage_context.persist(persist_dir=constants.PERSIST_DIR)
59
+
60
+
61
+ def handle_query(query):
62
+ """
63
+ Handle the query and return the response
64
+ """
65
+ storage_context = StorageContext.from_defaults(persist_dir=constants.PERSIST_DIR)
66
+ index = load_index_from_storage(storage_context)
67
+ text_qa_template = ChatPromptTemplate.from_messages(constants.LLM_RESPONSE_FORMAT)
68
+ query_engine = index.as_query_engine(text_qa_template=text_qa_template)
69
+
70
+ answer = query_engine.query(query)
71
+ if hasattr(answer, "response"):
72
+ return answer.response
73
+ if isinstance(answer, dict) and "response" in answer:
74
+ return answer["response"]
75
+ return "Sorry, I couldn't find an answer."
76
+
77
+
78
+ if __name__ == "__main__":
79
+ data_ingestion()
80
+
81
+ # --- Launch the chatbot interface ---
82
+ demo = gr.Interface(
83
+ fn=handle_query,
84
+ inputs="text",
85
+ outputs="text",
86
+ title="LLM Chatbot",
87
+ flagging_mode="never",
88
+ )
89
+ demo.launch()
90
+ # -------------------------------------
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ llama_index
3
+ llama-index-llms-huggingface
4
+ llama-index-llms-huggingface-api
5
+ llama-index-embeddings-huggingface
6
+ llama-index-embeddings-instructor
zigistry/__init__.py ADDED
File without changes
zigistry/constants.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ print("Loaded environment variables.")
5
+ load_dotenv()
6
+
7
+ PERSIST_DIR = "PERSIST_DIR"
8
+ LLM = "google/gemma-1.1-7b-it"
9
+ EMBEDDER = "BAAI/bge-small-en-v1.5"
10
+ HF_TOKEN = os.getenv("HF_TOKEN")
11
+ TEMPERATURE = 0.1
12
+ FILES = ["Zigistry.pdf"]
13
+
14
+ LLM_RESPONSE_FORMAT = [
15
+ (
16
+ "user",
17
+ """You are Zigister - Q&A Assistant
18
+
19
+ You are Zigister, a Q&A assistant created by Zigistry. Your primary role is to provide accurate and relevant answers based on the given context and instructions.
20
+
21
+ Remember to always provide clear and concise responses to user queries. If you are unsure about an answer, it is better to admit it than to provide incorrect information.
22
+
23
+ When asked on data, try responding with proper github links whenever possible.
24
+
25
+ Remember, https://github.com/ + the full_name is the github link.
26
+
27
+ When asked about your creator or organization, respond with:
28
+
29
+ "I was created by Zigistry.dev, A place where you can find all the libraries and programs that suits your Zig lang needs, making it more accessible and easier to use for all."
30
+ Also, zigistry.dev is the website with the latest data for packages and programs.
31
+
32
+ For all other inquiries, ensure your responses align with the provided context. If a question falls outside your scope, politely guide the user to ask within the relevant context.
33
+ Context:
34
+ {context_str}
35
+ Question:
36
+ {query_str}
37
+ """,
38
+ )
39
+ ]
zigistry/pre_requisite.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from zigistry import constants
2
+ import os
3
+ from logging import error
4
+
5
+
6
+ def CorrectlyCompletedPreRequisites() -> bool:
7
+ for i in constants.FILES:
8
+ if not os.path.exists(i):
9
+ error(f"File {i} does not exist")
10
+ return False
11
+ if not os.path.exists(constants.PERSIST_DIR):
12
+ error(f"Directory {constants.PERSIST_DIR} does not exist")
13
+ return False
14
+ if constants.HF_TOKEN == None:
15
+ error("Hugging Face token not set")
16
+ return False
17
+ return True
18
+
19
+
20
+ def performPreRequisites():
21
+ os.makedirs(constants.PERSIST_DIR, exist_ok=True)