|
import os |
|
import string |
|
from typing import Any, Dict, List, Tuple, Union |
|
|
|
import chromadb |
|
import numpy as np |
|
import openai |
|
import pandas as pd |
|
import requests |
|
import streamlit as st |
|
from datasets import load_dataset |
|
from langchain.document_loaders import TextLoader |
|
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.vectorstores import Chroma |
|
from scipy.spatial.distance import cosine |
|
|
|
openai.api_key = os.environ["OPENAI_API_KEY"] |
|
|
|
|
|
def call_chatgpt(prompt: str) -> str: |
|
""" |
|
Uses the OpenAI API to generate an AI response to a prompt. |
|
|
|
Args: |
|
prompt: A string representing the prompt to send to the OpenAI API. |
|
|
|
Returns: |
|
A string representing the AI's generated response. |
|
|
|
""" |
|
|
|
|
|
response = openai.Completion.create( |
|
model="gpt-3.5-turbo-instruct", |
|
prompt=prompt, |
|
temperature=0.5, |
|
max_tokens=500, |
|
top_p=1, |
|
frequency_penalty=0, |
|
presence_penalty=0, |
|
) |
|
|
|
|
|
ans = response.choices[0]["text"] |
|
|
|
|
|
return ans |
|
|
|
|
|
def openai_text_embedding(prompt: str) -> str: |
|
return openai.Embedding.create(input=prompt, model="text-embedding-ada-002")[ |
|
"data" |
|
][0]["embedding"] |
|
|
|
|
|
def calculate_sts_openai_score(sentence1: str, sentence2: str) -> float: |
|
|
|
embedding1 = openai_text_embedding(sentence1) |
|
embedding2 = openai_text_embedding(sentence2) |
|
|
|
|
|
embedding1 = np.asarray(embedding1) |
|
embedding2 = np.asarray(embedding2) |
|
|
|
|
|
similarity_score = 1 - cosine(embedding1, embedding2) |
|
|
|
return similarity_score |
|
|
|
|
|
def query(payload: Dict[str, Any]) -> Dict[str, Any]: |
|
""" |
|
Sends a JSON payload to a predefined API URL and returns the JSON response. |
|
Args: |
|
payload (Dict[str, Any]): The JSON payload to be sent to the API. |
|
Returns: |
|
Dict[str, Any]: The JSON response received from the API. |
|
""" |
|
|
|
|
|
API_URL = "https://sks7h7h5qkhoxwxo.us-east-1.aws.endpoints.huggingface.cloud" |
|
|
|
|
|
headers = {"Accept": "application/json", "Content-Type": "application/json"} |
|
|
|
|
|
response = requests.post(API_URL, headers=headers, json=payload) |
|
|
|
|
|
return response.json() |
|
|
|
|
|
def llama2_7b_ysa(prompt: str) -> str: |
|
""" |
|
Queries a model and retrieves the generated text based on the given prompt. |
|
This function sends a prompt to a model (presumably named 'llama2_7b') and extracts |
|
the generated text from the model's response. It's tailored for handling responses |
|
from a specific API or model query structure where the response is expected to be |
|
a list of dictionaries, with at least one dictionary containing a key 'generated_text'. |
|
Parameters: |
|
- prompt (str): The text prompt to send to the model. |
|
Returns: |
|
- str: The generated text response from the model. |
|
Note: |
|
- The function assumes that the 'query' function is previously defined and accessible |
|
within the same scope or module. It should send a request to the model and return |
|
the response in a structured format. |
|
- The 'parameters' dictionary is passed empty but can be customized to include specific |
|
request parameters as needed by the model API. |
|
""" |
|
|
|
|
|
query_payload: Dict[str, Any] = { |
|
"inputs": prompt, |
|
"parameters": {"max_new_tokens": 200}, |
|
} |
|
|
|
|
|
output = query(query_payload) |
|
|
|
|
|
response: str = output[0]["generated_text"] |
|
|
|
return response |
|
|