Spaces:
Running
Running
import requests | |
from langchain.output_parsers import ResponseSchema, StructuredOutputParser | |
from langchain.prompts import PromptTemplate | |
from langchain_community.document_loaders import WebBaseLoader | |
from langchain.prompts import ChatPromptTemplate | |
from langchain_core.output_parsers import StrOutputParser | |
from scrap_post import scrappost | |
def google_search(url,model , google_api_key, search_engine_id , num_results_per_query=[3,2,1]): | |
post=scrappost(url) | |
response_schemas = [ | |
ResponseSchema(name="questions", description="These are the top three relevant questions from the LinkedIn post" , type="list")] | |
output_parser = StructuredOutputParser.from_response_schemas(response_schemas) | |
format_instructions = output_parser.get_format_instructions() | |
template = """ | |
You are a helpful question extractor bot. You are provided with LinkedIn post and your task is to extract the top three relevant questions from the post which are related to the topics of the post only.: | |
LinkedIn post: {post} | |
{format_instructions} | |
""" | |
prompt = PromptTemplate( | |
template=template, | |
input_variables=["post"], | |
partial_variables={"format_instructions": format_instructions}, | |
) | |
chain = prompt | model | output_parser | |
result=chain.invoke({"post": post}) | |
questions=result['questions'] | |
# print(questions) | |
all_links = [] | |
for query, num_results in zip(questions, num_results_per_query): | |
url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={search_engine_id}&q={query}&tbm=nws&num={num_results}" | |
headers = {'Cookie': 'NID=513=KqMRZpKGj6WedOM42XZfrWSUunISFtrQ1twN2s6GEO_lIwb4SzNBCoRHw1Z6lmrRjuSHMxW2wIm1kL20piObJbroQQR5Sr3YSuCTXqH9UstqwzvSaUgS6P40fPvq9OKeDxWg3O8UGTYX_7g8xR76ox80aUZ4oy14DCjgwNInLDc'} | |
response = requests.get(url, headers=headers) | |
search_results = response.json() | |
links = [item['link'] for item in search_results.get('items', [])] | |
all_links.extend(links) | |
# def advanced_post(all_links ,model ,post): | |
loader = WebBaseLoader(all_links,encoding="utf-8") | |
loader.requests_per_second = 1 | |
docs = loader.load() | |
template1="""Extract pertinent information from the provided document that aligns with the content of the LinkedIn post. Focus solely on the document to identify and highlight relevant details that mirror the themes or topics discussed in the post. Avoid incorporating any content from the LinkedIn post itself, ensuring that the extracted information complements and enhances the post's message. | |
Linkedin post:{post} | |
Document: {content}""" | |
prompt = ChatPromptTemplate.from_template(template1) | |
chain= prompt | model | StrOutputParser() | |
relevant_content="" | |
for i in docs: | |
r=chain.invoke({'post':post , 'content':i.page_content}) | |
relevant_content+=r | |
template2="""Utilizing the content from the provided document, craft a new LinkedIn post focusing on a carefully chosen topic. Ensure a professional format incorporating headings, key points, stickers, and emojis to enhance engagement. The post's length should not surpass 3000 characters, and all content must be derived solely from the document. | |
Strive to select a topic that resonates with the document's information and presents it in a compelling and informative manner. | |
Document: {content}""" | |
prompt2 = ChatPromptTemplate.from_template(template2) | |
chain2= prompt2 | model | StrOutputParser() | |
result=chain2.invoke({'content':relevant_content}) | |
return result | |