Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
import streamlit as st | |
import pandas as pd | |
import base64 | |
from pyxlsb import open_workbook as open_xlsb | |
from io import BytesIO | |
from datetime import datetime | |
from funcs import extrae_dato_web, extrae_web, extrae_alternate, convierte_excel | |
from funcs import encuentra_hoja | |
import bs4 as BeautifulSoup | |
import urllib.request | |
from urllib.request import urlopen, Request | |
import re | |
c1, c2 = st.columns([6,6]) | |
with c2: | |
st.image('encopartslogo.jpg', width=300, caption='https://encoparts.com/') | |
rutas_websearch = ['https://en.hespareparts.com/search/?search=', 'https://offroadeq.com/parts-search/'] | |
st.title('Data Extraction') | |
st.subheader('Part Number Loading A') | |
selec = st.radio('Select: ', [None, 'Comma Separated Text', 'Excel File Loading']) | |
items = None | |
if selec is None: | |
st.write('Please select data loading method.') | |
else: | |
if selec == 'Comma Separated Text' and items is None: | |
st.write(selec) | |
codigos = st.text_input('Paste or write here the text, separated by commas: ') | |
if st.button('Proceed'): | |
items = list(codigos.split(',')) | |
else: | |
st.write(selec) | |
file = st.file_uploader('Select an Excel File: ') | |
if file is not None: | |
hojax = encuentra_hoja(file) | |
codigosf = pd.read_excel(file, sheet_name=hojax) | |
st.write('Rows, Columns: ', codigosf.shape) | |
namcol = codigosf.columns[0] | |
items = pd.Series(codigosf[namcol]).astype(str) | |
if selec is not None and items is not None: | |
st.write(items) | |
datos_tot = [] | |
st.write('Please wait while data is being processed ...') | |
for it in items: | |
#extrae_med = extrae_web(it) | |
extrae_dat = extrae_dato_web(it) | |
itxx = it[:-4]+'-'+it[-4:] | |
datos = [it, itxx] + list(extrae_dat) | |
#list(extrae_med) + | |
datos_tot.append(datos) | |
dtdf = pd.DataFrame(datos_tot) | |
dtdf.columns = ['part_no_', 'part_no', | |
#'descrip_en', 'length_m', 'width_m', 'height_m', | |
#'vol_m3',' compatible', | |
'alternate', 'peso_lb', 'precio_bm_us', 'descr'] | |
now = datetime.now() | |
date_time = now.strftime("%m/%d/%Y, %H:%M:%S").replace('/','_').replace(':','_').replace(', ', '_') | |
dtdf['peso_kg'] = dtdf.peso_lb*0.453592 | |
dtdf2 = dtdf[['part_no_', 'part_no', 'descr', | |
#'compatible', | |
#'length_m', 'width_m', 'height_m', 'vol_m3', | |
'peso_kg', 'precio_bm_us', 'alternate']] | |
df_xlsx = convierte_excel(dtdf2) | |
st.download_button(label='📩 Download XLSX', data=df_xlsx, | |
file_name = 'df_test'+date_time+'.xlsx') | |
csv = dtdf2.to_csv(index=False) | |
st.download_button(label='📩 Download CSV', data=csv, file_name = 'extraccion_'+date_time+'.csv') | |
else: | |
st.write('Please select loading option, load and proceed.') | |
c1, c2, c3 = st.columns([4,4,4]) | |
with c3: | |
st.image('gdmklogo.png', width=100, caption='Updated by GestioDinámica in 2023') |