Spaces:
Sleeping
Sleeping
import datetime | |
from urllib.request import Request, urlopen | |
from pypdf import PdfReader | |
from io import StringIO | |
import io | |
import pandas as pd | |
import os | |
import torch | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
from transformers import pipeline | |
from openai import OpenAI | |
from groq import Groq | |
import time | |
import json | |
from openai import OpenAI | |
openai_key = "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" | |
client = OpenAI(api_key = openai_key) | |
desc = pd.read_excel('Descriptor.xlsx',header = None) | |
desc_list = desc.iloc[:,0].to_list() | |
def callAzure(prompt,text): | |
url = "https://Meta-Llama-3-70B-Instruct-fkqip-serverless.eastus2.inference.ai.azure.com" | |
api_key = "o5yaLhTIvg0s5zuYVInBpyneEZO8oonY" | |
client = OpenAI(base_url=url, api_key=api_key) | |
msg = "{} {}".format(prompt, text) | |
response = client.chat.completions.create( | |
messages=[ | |
{ | |
"role": "user", | |
"content": msg, | |
} | |
], | |
model="azureai", | |
max_tokens = 1000 | |
) | |
return response.choices[0].message.content | |
def filter(input_json): | |
sym = pd.read_excel('symbol.xlsx',header = None) | |
sym_list = sym.iloc[:,0].to_list() | |
if input_json['FileURL']==None or input_json['FileURL'].lower()=='null': | |
return [0,"File_URL"] | |
if input_json['symbol']== 'null' or input_json['symbol'] not in sym_list: | |
return [0,"symbol"] | |
if input_json['TypeofAnnouncement'] not in ['General_Announcements','Outcome','General']: | |
return [0,"Annoucement"] | |
if input_json['Descriptor'] not in desc_list: | |
return [0,"Desc"] | |
url = 'https://www.bseindia.com/xml-data/corpfiling/AttachLive/'+ input_json['FileURL'].split('Pname=')[-1] | |
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) | |
html = urlopen(req) | |
cont = html.read() | |
reader = PdfReader(io.BytesIO(cont)) | |
content = '' | |
for i in range(len(reader.pages)): | |
content+= reader.pages[i].extract_text() | |
document = content | |
return [1, document] | |
def summary(input_json): | |
prompt = pd.read_excel('DescriptorPrompt.xlsx') | |
promptShort = prompt.iloc[:,1].to_list() | |
promptLong = prompt.iloc[:,2].to_list() | |
output = {} | |
filtering_results = filter(input_json) | |
if filtering_results[0] == 0: | |
#return 0 | |
return filtering_results[1] | |
id = desc_list.index(input_json['Descriptor']) | |
long_text = filtering_results[1] | |
long_text = long_text.lstrip() | |
long_text = long_text.rstrip() | |
long_text = long_text[:6000] | |
url = 'https://www.bseindia.com/xml-data/corpfiling/AttachLive/'+ input_json['FileURL'].split('Pname=')[-1] | |
output["Link to BSE website"] = url | |
output["Date of time of receiving data from BSE"] = input_json["newsdate"] + "Z" | |
output["Stock Ticker"] = input_json['symbol'] | |
answer = callAzure("You are an financial expert. Wherever possible, mention the name of the company " + promptShort[id] + " Do not exceed over 400 characters", long_text) | |
try: | |
idx = answer.index("\n") | |
except: | |
idx = -2 | |
output['Short Summary'] = answer[idx+2:] | |
answer = callAzure("Make sure the following summary of a news article is not more than 80 words. Rewrite it and make it below 80 words ", output['Short Summary']) | |
try: | |
idx = answer.index("\n") | |
except: | |
idx = -2 | |
output['Short Summary'] = answer[idx+2:] | |
prompt = "Answer in 1 word only. Financial SEO tag for this news article. Nothing more than that" | |
answer = callAzure(prompt, output['Short Summary']) | |
output['Tag'] = answer | |
prompt = "Answer in single sentence. A headline for this News Article. Nothing more than that" | |
answer = callAzure(prompt, output['Short Summary']) | |
output['Headline'] = answer | |
utc_now = datetime.datetime.utcnow() | |
ist_now = utc_now.astimezone(datetime.timezone(datetime.timedelta(hours=5, minutes=30))) | |
Date = ist_now.strftime("%Y-%m-%d") | |
time = ist_now.strftime("%X") | |
output['Date and time of data delivery from Skylark'] = Date+"T"+time+"Z" | |
prompt = "Answer in one word the sentiment of this News out of Positive, Negative or Neutral {}" | |
output['Sentiment'] = callAzure(prompt, output['Short Summary']) | |
completion = client.chat.completions.create( | |
model="gpt-4o", | |
messages=[ | |
{"role": "system", "content": "You are a financial expert. Help the client with summarizing the financial newsletter. Write the summary in max 500 words. Do not truncate."}, | |
{"role": "user", "content": "{} {}".format(promptLong[id], long_text)} | |
], | |
temperature=0, | |
max_tokens=4000, | |
) | |
output['Long summary'] = completion.choices[0].message.content | |
# response = client.images.generate( | |
# model="dall-e-3", | |
# prompt=headline.text, | |
# size="1024x1024", | |
# quality="standard", | |
# n=1 | |
# ) | |
# output["Link to Infographic (data visualization only)] = response.data[0].url | |
return output |