Spaces:

vhr1007
/

traversaal_api_test

Sleeping

File size: 13,232 Bytes

8959c46

# import streamlit as st
# from carbon import Carbon
# import requests
# import json

# # Carbon API Key
# CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea"
# CUSTOMER_ID = "Candid"

# def get_google_drive_oauth(carbon):
#     get_oauth_url_response = carbon.integrations.get_oauth_url(
#         service="GOOGLE_DRIVE",
#         scope="https://www.googleapis.com/auth/drive.readonly",
#         connecting_new_account=True,
#     )
#     return get_oauth_url_response.oauth_url

# def get_dropbox_oauth(carbon):
#     get_oauth_url_response = carbon.integrations.get_oauth_url(
#         service="DROPBOX",
#         connecting_new_account=True,
#     )
#     return get_oauth_url_response.oauth_url

# def get_notion_oauth(carbon):
#     get_oauth_url_response = carbon.integrations.get_oauth_url(
#         service="NOTION",
#         connecting_new_account=True,
#     )
#     return get_oauth_url_response.oauth_url

# def sync_github(carbon, username, token):
#     sync_response = carbon.integrations.sync_git_hub(
#         username=username,
#         token=token,
#         sync_source_items=True
#     )
#     return sync_response

# def sync_gitbook(carbon, access_token, organization):
#     sync_response = carbon.integrations.sync_git_book(
#         access_token=access_token,
#         organization=organization,
#         sync_source_items=True
#     )
#     return sync_response

# def sync_s3(carbon, access_key, access_key_secret):
#     sync_response = carbon.integrations.sync_s3(
#         access_key=access_key,
#         access_key_secret=access_key_secret,
#         sync_source_items=True
#     )
#     return sync_response

# def sync_google_drive(carbon, data_source_id):
#     sync_response = carbon.integrations.sync_data_source_items(data_source_id=int(data_source_id))
#     return sync_response


# def list_files(carbon, data_source_id=None, service="GOOGLE_DRIVE"):
#     if data_source_id:
#         sync_google_drive(carbon, data_source_id)
#         list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
#         st.session_state['current_data_source'] = data_source_id  # Store the current data source
#         st.session_state['files'] = list_files_response.items  # Store the fetched files
#     else:
#         if 'current_data_source' in st.session_state and st.session_state['current_data_source']:
#             data_source_id = st.session_state['current_data_source']
#             sync_google_drive(carbon, data_source_id)
#             list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
#             st.session_state['files'] = list_files_response.items  # Store the fetched files
#         else:
#             query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
#                 pagination={"limit": 100, "offset": 0},
#                 order_by="created_at",
#                 order_dir="desc",
#                 filters={"source": service},
#             )
#             if query_user_data_sources_response.results:
#                 data_source_id = query_user_data_sources_response.results[0].id
#                 sync_google_drive(carbon, data_source_id)
#                 list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id))
#                 st.session_state['current_data_source'] = data_source_id
#                 st.session_state['files'] = list_files_response.items  # Store the fetched files
#             else:
#                 list_files_response = None
#     return list_files_response.items if list_files_response else None

# def list_all_files(carbon, data_source_id):
#     url = "https://api.carbon.ai/user_files_v2"
#     payload = {
#         "pagination": {
#             "limit": 100,
#             "offset": 0
#         },
#         "order_by": "created_at",
#         "order_dir": "desc",
#         "filters": {
#             "organization_user_data_source_id": [data_source_id],
#             "embedding_generators": ["OPENAI"],
#             "include_all_children": True,
#         },
#         "include_raw_file": True,
#         "include_parsed_text_file": True,
#         "include_additional_files": True
#     }
#     headers = {
#         "authorization": f"Bearer {CARBON_API_KEY}",
#         "customer-id": CUSTOMER_ID,
#         "Content-Type": "application/json"
#     }

#     response = requests.request("POST", url, json=payload, headers=headers)
#     res = json.loads(response.text)
#     file_id= res['results'][0]['id']
#     for i,document in enumerate(res['results']):
#         print(document['name'])
#         print(document['id'])
        
#     file_id=res['results'][0]['id']
#     print(file_id)
#     return res['results']


# def list_user_documents(carbon):
#     query_user_documents_response = carbon.documents.query_documents(
#         pagination={"limit": 100, "offset": 0},
#         order_by="created_at",
#         order_dir="desc"
#     )
#     return query_user_documents_response.documents if query_user_documents_response else None

# def semantic_search_v2(carbon, query, tags_v2=None, hybrid_search=False):
#     search_response = carbon.embeddings.get_documents(
#         query=query,
#         k=2,
#         tags_v2=tags_v2 if tags_v2 else {},
#         include_tags=True,
#         include_vectors=True,
#         include_raw_file=True,
#         hybrid_search=hybrid_search,
#         hybrid_search_tuning_parameters={
#             "weight_a": 0.5,
#             "weight_b": 0.5,
#         },
#         media_type="TEXT",
#         embedding_model="OPENAI",
#     )
#     return search_response.documents


# def main():
#     st.title('Data Connector using Carbon SDK')
    
#     # Authenticate with Carbon API
#     st.write('### Authenticate with Carbon API')
#     carbon = Carbon(api_key=CARBON_API_KEY, customer_id=CUSTOMER_ID)
#     token = carbon.auth.get_access_token()
#     carbon = Carbon(access_token=token.access_token)  # authenticated object

#     # Connect to Data Source
#     st.write('## Connect to Data Source')
#     service = st.selectbox('Select Data Source for OAuth', ['GOOGLE_DRIVE', 'DROPBOX', 'NOTION'])
#     if st.button('Get OAuth URL'):
#         with st.spinner('Fetching OAuth URL...'):
#             try:
#                 if service == "GOOGLE_DRIVE":
#                     oauth_url = get_google_drive_oauth(carbon)
#                 elif service == "DROPBOX":
#                     oauth_url = get_dropbox_oauth(carbon)
#                 elif service == "NOTION":
#                     oauth_url = get_notion_oauth(carbon)
#                 st.write(f"OAuth URL for {service}: {oauth_url}")
#                 st.session_state['current_data_source'] = None  # Reset the current data source
#                 st.session_state['files'] = None  # Clear the previous files
#                 st.session_state['oauth_fetched'] = True
#             except Exception as e:
#                 st.error(f"An error occurred: {e}")

#     if 'oauth_fetched' in st.session_state and st.session_state['oauth_fetched']:
#         st.write("OAuth URL fetched. Please authenticate and then click 'Sync and Fetch Files'.")
#         if st.button('Sync and Fetch Files'):
#             with st.spinner('Syncing and fetching files...'):
#                 try:
#                     query_user_data_sources_response = carbon.data_sources.query_user_data_sources(
#                         pagination={"limit": 100, "offset": 0},
#                         order_by="created_at",
#                         order_dir="desc",
#                         filters={"source": service},
#                     )
#                     if query_user_data_sources_response.results:
#                         data_source_id = query_user_data_sources_response.results[0].id
#                         sync_google_drive(carbon, data_source_id)
#                         st.session_state['current_data_source'] = data_source_id
#                         st.session_state['oauth_fetched'] = False
#                         st.success("Synced successfully! Now you can list the files.")
#                     else:
#                         st.error("No data sources found. Please ensure the connection was successful.")
#                 except Exception as e:
#                     st.error(f"An error occurred: {e}")

#     # List Files in Data Source
#     st.write(f'## List Files in {service}')
#     data_source_id = st.text_input('Enter Data Source ID (leave blank to list all files)')
#     if st.button('List Files'):
#         with st.spinner('Fetching files...'):
#             try:
#                 if 'current_data_source' in st.session_state and st.session_state['current_data_source']:
#                     data_source_id = st.session_state['current_data_source']
#                 files = list_files(carbon, data_source_id if data_source_id else None, service)
#                 if files:
#                     st.write(f"Files in {service}:")
#                     for item in files:
#                         st.write(f"File ID: {item.id}, File Name: {item.name}, File Size: {item.size if hasattr(item, 'size') else 'N/A'}, Last Modified: {item.last_modified if hasattr(item, 'last_modified') else 'N/A'}")
#                 else:
#                     st.write("No files found.")
#             except Exception as e:
#                 st.error(f"An error occurred: {e}")

#     # List All Files
#     st.write('### List All Files')
#     if st.button('List All Files'):
#         with st.spinner('Fetching all files...'):
#             try:
#                 if 'current_data_source' in st.session_state and st.session_state['current_data_source']:
#                     data_source_id = st.session_state['current_data_source']
#                     all_files = list_all_files(carbon, data_source_id)
#                     if all_files:
#                         st.write("All files:")
#                         for i, document in enumerate(all_files):
#                             file_id = document['id']
#                             st.write(f"File ID: {document['id']}, File Name: {document['name']}")
#                     else:
#                         st.write("No files found.")
#             except Exception as e:
#                 st.error(f"An error occurred: {e}")

#     # Search in the Connected Data Source
#     st.write('### Search in the Connected Data Source')
#     query = st.text_input("Enter your query:", value="Type here...")
#     if st.button('Search'):
#         if query:
#             with st.spinner('Searching...'):
#                 try:
#                     all_files = list_all_files(carbon, data_source_id)
#                     url = "https://api.carbon.ai/embeddings"
#                     payload = {
#                         "query": query,
#                         "k": 2,
#                         "file_ids": file_id,
#                         "include_all_children": True,
#                         "tags": {},
#                         "include_tags": True,
#                         "include_vectors": True,
#                         "include_raw_file": True,
#                         "hybrid_search": False,
#                         "media_type": "TEXT",
#                         "embedding_model": "OPENAI"
#                     }
#                     headers = {
#                         "authorization": f"Bearer {CARBON_API_KEY}",
#                         "customer-id": CUSTOMER_ID,
#                         "Content-Type": "application/json"
#                     }
#                     response_search = requests.post(url, json=payload, headers=headers)
#                     response_search_chunks = json.loads(response_search.text)
                    
#                     st.write("Search results:")
#                     for i, doc in enumerate(response_search_chunks['documents']):
#                         st.write(f"Document {i+1}:")
#                         st.write(f"Content: {doc['content']}")
#                         st.write(f"Source: {doc['source']}")
#                         st.write(f"Match Percentage: {doc['score'] * 100}%")
#                         if 'file_url' in doc:
#                             st.markdown(f"[Download {doc['filename']}]({doc['file_url']})")
#                         st.write("-------------------------------------------------")
#                 except Exception as e:
#                     st.error(f"An error occurred: {e}")
#         else:
#             st.write("Please enter a query to search.")
    
#     # Display Search History
#     st.write('## Search History')
#     if 'search_history' not in st.session_state:
#         st.session_state['search_history'] = []
    
#     if query and st.button('Add to Search History'):
#         st.session_state['search_history'].append(query)
    
#     if st.session_state['search_history']:
#         st.write("Past Searches:")
#         for past_query in st.session_state['search_history']:
#             st.write(past_query)

# # Call the main function
# if __name__ == '__main__':
#     main()