File size: 5,282 Bytes
284c80a
 
 
 
 
 
 
 
 
 
 
 
 
382488b
284c80a
 
720c8eb
 
284c80a
 
 
 
 
720c8eb
 
284c80a
 
 
 
 
 
 
 
 
 
 
720c8eb
284c80a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ebb24d
 
284c80a
 
 
 
 
 
 
 
720c8eb
 
f520f6e
 
720c8eb
284c80a
 
 
 
 
 
 
 
bc29c51
c4ee0af
 
 
 
 
 
b6f4209
e279307
 
 
 
 
284c80a
39cf970
284c80a
 
 
39cf970
284c80a
 
 
 
 
 
 
 
 
 
 
 
 
720c8eb
c102673
720c8eb
71df0ea
470d207
c8ad4b9
 
71df0ea
720c8eb
 
3d86e91
 
284c80a
 
 
 
 
 
 
 
 
95bc4d4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import datetime
from urllib.request import Request, urlopen
from pypdf import PdfReader
from io import StringIO
import io
import pandas as pd
import os
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
from openai import OpenAI
from groq import Groq
import time 
import json
from openai import OpenAI

openai_key = "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"
client = OpenAI(api_key = openai_key)
desc = pd.read_excel('Descriptor.xlsx',header = None)
desc_list  = desc.iloc[:,0].to_list()

def callAzure(prompt,text):
    
    url = "https://Meta-Llama-3-70B-Instruct-fkqip-serverless.eastus2.inference.ai.azure.com"
    api_key = "o5yaLhTIvg0s5zuYVInBpyneEZO8oonY"
    client = OpenAI(base_url=url, api_key=api_key)
    msg = "{} {}".format(prompt, text)
    
    response = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": msg,
            }
        ],
        model="azureai",
        max_tokens = 1000
        ) 
    
    return response.choices[0].message.content
    
def filter(input_json):

  sym = pd.read_excel('symbol.xlsx',header = None)
  sym_list  = sym.iloc[:,0].to_list()
  
  if input_json['FileURL']==None or input_json['FileURL'].lower()=='null':
    return [0,"File_URL"]
  if input_json['symbol']== 'null' or input_json['symbol'] not in sym_list:
    return [0,"symbol"]
  if input_json['TypeofAnnouncement'] not in ['General_Announcements','Outcome','General']:
    return [0,"Annoucement"]
  if input_json['Descriptor'] not in desc_list:
    return [0,"Desc"]
      
  url = 'https://www.bseindia.com/xml-data/corpfiling/AttachLive/'+ input_json['FileURL'].split('Pname=')[-1]
  req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
  html = urlopen(req)
  cont = html.read()
  reader = PdfReader(io.BytesIO(cont))
  content = ''
  for i in range(len(reader.pages)):
    content+= reader.pages[i].extract_text()
  document = content

  return [1, document]

def summary(input_json):
    
    prompt = pd.read_excel('DescriptorPrompt.xlsx')
    promptShort  = prompt.iloc[:,1].to_list()
    promptLong  = prompt.iloc[:,2].to_list() 
    
    output = {}
    filtering_results = filter(input_json)
    if filtering_results[0] == 0:
        #return 0
        return filtering_results[1]

    id = desc_list.index(input_json['Descriptor'])
    long_text = filtering_results[1]
    long_text = long_text.lstrip()
    long_text = long_text.rstrip()

    long_text = long_text[:6000]
    
    url = 'https://www.bseindia.com/xml-data/corpfiling/AttachLive/'+ input_json['FileURL'].split('Pname=')[-1]

    output["Link to BSE website"] = url

    output["Date of time of receiving data from BSE"] = input_json["newsdate"] + "Z"
    
    output["Stock Ticker"] = input_json['symbol']

    answer = callAzure("You are an financial expert. Wherever possible, mention the name of the company " + promptShort[id] + " Do not exceed over 400 characters", long_text) 
    try:
        idx = answer.index("\n")
    except:
        idx = -2
    output['Short Summary'] = answer[idx+2:]

    answer = callAzure("Make sure the following summary of a news article is not more than 80 words. Rewrite it and make it below 80 words ", output['Short Summary'])
    try:
        idx = answer.index("\n")
    except:
        idx = -2
    output['Short Summary'] = answer[idx+2:]

    prompt = "Provide the main topic of the news article strictly as a tag, using only one or two words, with only the first word capitalized and the rest in lowercase. No additional text or explanation."
    answer = callAzure(prompt, output['Short Summary'])
    output['Tag'] = answer

    prompt = "Generate a precise headline for the news article that includes the name of the company. Be very careful about correctly representing any financial figures mentioned in lakhs and crores. Provide only the headline, with no additional text or explanation."
    answer = callAzure(prompt, output['Short Summary'])
    output['Headline'] = answer

    utc_now = datetime.datetime.utcnow()
    ist_now = utc_now.astimezone(datetime.timezone(datetime.timedelta(hours=5, minutes=30)))
    
    Date = ist_now.strftime("%Y-%m-%d")
    time = ist_now.strftime("%X")
    output['Date and time of data delivery from Skylark'] = Date+"T"+time+"Z"
    
    prompt = "Answer in one word the sentiment of this News out of Positive, Negative or Neutral {}"
    output['Sentiment'] = callAzure(prompt, output['Short Summary'])

    completion = client.chat.completions.create(
    model="gpt-4o",
    messages=[
    {"role": "system", "content": "You are a financial expert. Help the client with summarizing the financial newsletter. Write the summary in max 500 words. Do not truncate."},
    {"role": "user", "content": "{} {}".format(promptLong[id], long_text)}
    ],
    temperature=0,
    max_tokens=4000,
    )

    output['Long summary'] = completion.choices[0].message.content

    # response = client.images.generate(
    # model="dall-e-3",
    # prompt=headline.text,
    # size="1024x1024",
    # quality="standard",
    # n=1
    # ) 
    # output["Link to Infographic (data visualization only)] = response.data[0].url
    
    return output