# -*- coding: utf-8 -*- import streamlit as st import pandas as pd import base64 from pyxlsb import open_workbook as open_xlsb from io import BytesIO from datetime import datetime from funcs import extrae_dato_web, extrae_web, extrae_alternate, convierte_excel from funcs import encuentra_hoja import bs4 as BeautifulSoup import urllib.request from urllib.request import urlopen, Request import re c1, c2 = st.columns([6,6]) with c2: st.image('encopartslogo.jpg', width=300, caption='https://encoparts.com/') rutas_websearch = ['https://en.hespareparts.com/search/?search=', 'https://offroadeq.com/parts-search/'] st.title('Data Extraction') st.subheader('Part Number Loading A') selec = st.radio('Select: ', [None, 'Comma Separated Text', 'Excel File Loading']) items = None if selec is None: st.write('Please select data loading method.') else: if selec == 'Comma Separated Text' and items is None: st.write(selec) codigos = st.text_input('Paste or write here the text, separated by commas: ') if st.button('Proceed'): items = list(codigos.split(',')) else: st.write(selec) file = st.file_uploader('Select an Excel File: ') if file is not None: hojax = encuentra_hoja(file) codigosf = pd.read_excel(file, sheet_name=hojax) st.write('Rows, Columns: ', codigosf.shape) namcol = codigosf.columns[0] items = pd.Series(codigosf[namcol]).astype(str) if selec is not None and items is not None: st.write(items) datos_tot = [] st.write('Please wait while data is being processed ...') for it in items: #extrae_med = extrae_web(it) extrae_dat = extrae_dato_web(it) itxx = it[:-4]+'-'+it[-4:] datos = [it, itxx] + list(extrae_dat) #list(extrae_med) + datos_tot.append(datos) dtdf = pd.DataFrame(datos_tot) dtdf.columns = ['part_no_', 'part_no', #'descrip_en', 'length_m', 'width_m', 'height_m', #'vol_m3',' compatible', 'alternate', 'peso_lb', 'precio_bm_us', 'descr'] now = datetime.now() date_time = now.strftime("%m/%d/%Y, %H:%M:%S").replace('/','_').replace(':','_').replace(', ', '_') dtdf['peso_kg'] = dtdf.peso_lb*0.453592 dtdf2 = dtdf[['part_no_', 'part_no', 'descr', #'compatible', #'length_m', 'width_m', 'height_m', 'vol_m3', 'peso_kg', 'precio_bm_us', 'alternate']] df_xlsx = convierte_excel(dtdf2) st.download_button(label='📩 Download XLSX', data=df_xlsx, file_name = 'df_test'+date_time+'.xlsx') csv = dtdf2.to_csv(index=False) st.download_button(label='📩 Download CSV', data=csv, file_name = 'extraccion_'+date_time+'.csv') else: st.write('Please select loading option, load and proceed.') c1, c2, c3 = st.columns([4,4,4]) with c3: st.image('gdmklogo.png', width=100, caption='Updated by GestioDinámica in 2023')