File size: 1,842 Bytes
32b96c3
 
 
 
 
 
 
 
 
b3716e2
 
 
424c49f
 
32b96c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a3eb0b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
import json
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores.faiss import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
import google.generativeai as genai
from dotenv import load_dotenv
load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))


def extract_text(json_path):
    with open(json_path, 'r') as file:
        data = json.load(file)
        
    text = ""
    for professor in data['professors']:
        professor_id = professor.get('professor_id')
        name = professor.get('name')
        course = professor.get('course')
        reviews = professor.get('reviews', [])
                
        text += f'\nProfessor ID: {professor_id}, Professor Name: {name}, Course: {course}\n '
        if reviews:
            for review in reviews:
                rating = review.get('rating')
                review_text = review.get('review_text')
                text += f"Rating: {rating}, Review: {review_text}\n"
        else:
            print("No reviews available.")
    return text

def split_text_into_chunks(text):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    text_chunks = splitter.split_text(text)
    return text_chunks

def create_vector_store(chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vector_store = FAISS.from_texts(chunks, embedding=embeddings)
    vector_store.save_local("reviews_index")

def main(json_path):
    text = extract_text(json_path)
    chunks = split_text_into_chunks(text)
    create_vector_store(chunks)

json_path = 'reviews.json'
main(json_path)