ishans24 commited on
Commit
32b96c3
1 Parent(s): 529ea31

Update util.py

Browse files
Files changed (1) hide show
  1. util.py +50 -50
util.py CHANGED
@@ -1,51 +1,51 @@
1
- import os
2
- import json
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- from langchain_community.vectorstores.faiss import FAISS
5
- from langchain.chains.question_answering import load_qa_chain
6
- from langchain.prompts import PromptTemplate
7
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
8
- from langchain_google_genai import ChatGoogleGenerativeAI
9
- import google.generativeai as genai
10
- from dotenv import load_dotenv
11
- load_dotenv()
12
- genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
13
-
14
- def extract_text(json_path):
15
- with open(json_path, 'r') as file:
16
- data = json.load(file)
17
-
18
- text = ""
19
- for professor in data['professors']:
20
- professor_id = professor.get('professor_id')
21
- name = professor.get('name')
22
- course = professor.get('course')
23
- reviews = professor.get('reviews', [])
24
-
25
- text += f'\nProfessor ID: {professor_id}, Professor Name: {name}, Course: {course}\n '
26
- if reviews:
27
- for review in reviews:
28
- rating = review.get('rating')
29
- review_text = review.get('review_text')
30
- text += f"Rating: {rating}, Review: {review_text}\n"
31
- else:
32
- print("No reviews available.")
33
- return text
34
-
35
- def split_text_into_chunks(text):
36
- splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
37
- text_chunks = splitter.split_text(text)
38
- return text_chunks
39
-
40
- def create_vector_store(chunks):
41
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
42
- vector_store = FAISS.from_texts(chunks, embedding=embeddings)
43
- vector_store.save_local("reviews_index")
44
-
45
- def main(json_path):
46
- text = extract_text(json_path)
47
- chunks = split_text_into_chunks(text)
48
- create_vector_store(chunks)
49
-
50
- json_path = 'reviews.json'
51
  main(json_path)
 
1
+ import os
2
+ import json
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.vectorstores.faiss import FAISS
5
+ from langchain.chains.question_answering import load_qa_chain
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
8
+ from langchain_google_genai import ChatGoogleGenerativeAI
9
+ import google.generativeai as genai
10
+ from dotenv import load_dotenv
11
+
12
+ def extract_text(json_path):
13
+ with open(json_path, 'r') as file:
14
+ data = json.load(file)
15
+
16
+ text = ""
17
+ for professor in data['professors']:
18
+ professor_id = professor.get('professor_id')
19
+ name = professor.get('name')
20
+ course = professor.get('course')
21
+ reviews = professor.get('reviews', [])
22
+
23
+ text += f'\nProfessor ID: {professor_id}, Professor Name: {name}, Course: {course}\n '
24
+ if reviews:
25
+ for review in reviews:
26
+ rating = review.get('rating')
27
+ review_text = review.get('review_text')
28
+ text += f"Rating: {rating}, Review: {review_text}\n"
29
+ else:
30
+ print("No reviews available.")
31
+ return text
32
+
33
+ def split_text_into_chunks(text):
34
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
35
+ text_chunks = splitter.split_text(text)
36
+ return text_chunks
37
+
38
+ def create_vector_store(chunks):
39
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
40
+ vector_store = FAISS.from_texts(chunks, embedding=embeddings)
41
+ vector_store.save_local("reviews_index")
42
+
43
+ def main(json_path):
44
+ load_dotenv()
45
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
46
+ text = extract_text(json_path)
47
+ chunks = split_text_into_chunks(text)
48
+ create_vector_store(chunks)
49
+
50
+ json_path = 'reviews.json'
51
  main(json_path)