Spaces:
Sleeping
Sleeping
jcmachicao
commited on
Commit
•
ae6afbe
1
Parent(s):
3381eff
Update app.py
Browse files
app.py
CHANGED
@@ -4,8 +4,10 @@ import streamlit as st
|
|
4 |
import pandas as pd
|
5 |
import base64
|
6 |
from pyxlsb import open_workbook as open_xlsb
|
|
|
7 |
from datetime import datetime
|
8 |
from funcs import extrae_dato_web, extrae_web, extrae_alternate, convierte_excel
|
|
|
9 |
import bs4 as BeautifulSoup
|
10 |
import urllib.request
|
11 |
from urllib.request import urlopen, Request
|
@@ -16,29 +18,30 @@ with c2:
|
|
16 |
st.image('encopartslogo.jpg', width=300, caption='https://encoparts.com/')
|
17 |
|
18 |
rutas_websearch = ['https://en.hespareparts.com/search/?search=', 'https://offroadeq.com/parts-search/']
|
19 |
-
st.title('
|
20 |
-
st.subheader('
|
21 |
-
selec = st.radio('
|
22 |
items = None
|
23 |
|
24 |
if selec is None:
|
25 |
|
26 |
-
st.write('
|
27 |
|
28 |
else:
|
29 |
|
30 |
-
if selec == '
|
31 |
st.write(selec)
|
32 |
-
codigos = st.text_input('
|
33 |
-
if st.button('
|
34 |
items = list(codigos.split(','))
|
35 |
|
36 |
else:
|
37 |
st.write(selec)
|
38 |
-
file = st.file_uploader('
|
39 |
if file is not None:
|
40 |
-
|
41 |
-
|
|
|
42 |
namcol = codigosf.columns[0]
|
43 |
items = pd.Series(codigosf[namcol]).astype(str)
|
44 |
|
@@ -47,35 +50,39 @@ if selec is not None and items is not None:
|
|
47 |
st.write(items)
|
48 |
|
49 |
datos_tot = []
|
50 |
-
st.write('
|
51 |
for it in items:
|
52 |
-
extrae_med = extrae_web(it)
|
53 |
extrae_dat = extrae_dato_web(it)
|
54 |
itxx = it[:-4]+'-'+it[-4:]
|
55 |
-
datos = [it, itxx] + list(
|
|
|
56 |
datos_tot.append(datos)
|
57 |
|
58 |
dtdf = pd.DataFrame(datos_tot)
|
59 |
dtdf.columns = ['part_no_', 'part_no',
|
60 |
-
'descrip_en', 'length_m', 'width_m', 'height_m',
|
61 |
-
'
|
|
|
62 |
now = datetime.now()
|
63 |
date_time = now.strftime("%m/%d/%Y, %H:%M:%S").replace('/','_').replace(':','_').replace(', ', '_')
|
64 |
dtdf['peso_kg'] = dtdf.peso_lb*0.453592
|
65 |
|
66 |
-
dtdf2 = dtdf[['part_no_', 'part_no', 'descr',
|
|
|
|
|
|
|
67 |
|
68 |
df_xlsx = convierte_excel(dtdf2)
|
69 |
-
st.download_button(label='📩
|
70 |
-
file_name = '
|
71 |
-
|
72 |
csv = dtdf2.to_csv(index=False)
|
73 |
-
st.download_button(label='📩
|
74 |
-
file_name = 'df_'+date_time+'.csv')
|
75 |
|
76 |
else:
|
77 |
-
st.write('
|
78 |
|
79 |
c1, c2, c3 = st.columns([4,4,4])
|
80 |
with c3:
|
81 |
-
st.image('gdmklogo.png', width=100, caption='
|
|
|
4 |
import pandas as pd
|
5 |
import base64
|
6 |
from pyxlsb import open_workbook as open_xlsb
|
7 |
+
from io import BytesIO
|
8 |
from datetime import datetime
|
9 |
from funcs import extrae_dato_web, extrae_web, extrae_alternate, convierte_excel
|
10 |
+
from funcs import encuentra_hoja
|
11 |
import bs4 as BeautifulSoup
|
12 |
import urllib.request
|
13 |
from urllib.request import urlopen, Request
|
|
|
18 |
st.image('encopartslogo.jpg', width=300, caption='https://encoparts.com/')
|
19 |
|
20 |
rutas_websearch = ['https://en.hespareparts.com/search/?search=', 'https://offroadeq.com/parts-search/']
|
21 |
+
st.title('Data Extraction')
|
22 |
+
st.subheader('Part Number Loading A')
|
23 |
+
selec = st.radio('Select: ', [None, 'Comma Separated Text', 'Excel File Loading'])
|
24 |
items = None
|
25 |
|
26 |
if selec is None:
|
27 |
|
28 |
+
st.write('Please select data loading method.')
|
29 |
|
30 |
else:
|
31 |
|
32 |
+
if selec == 'Comma Separated Text' and items is None:
|
33 |
st.write(selec)
|
34 |
+
codigos = st.text_input('Paste or write here the text, separated by commas: ')
|
35 |
+
if st.button('Proceed'):
|
36 |
items = list(codigos.split(','))
|
37 |
|
38 |
else:
|
39 |
st.write(selec)
|
40 |
+
file = st.file_uploader('Select an Excel File: ')
|
41 |
if file is not None:
|
42 |
+
hojax = encuentra_hoja(file)
|
43 |
+
codigosf = pd.read_excel(file, sheet_name=hojax)
|
44 |
+
st.write('Rows, Columns: ', codigosf.shape)
|
45 |
namcol = codigosf.columns[0]
|
46 |
items = pd.Series(codigosf[namcol]).astype(str)
|
47 |
|
|
|
50 |
st.write(items)
|
51 |
|
52 |
datos_tot = []
|
53 |
+
st.write('Please wait while data is being processed ...')
|
54 |
for it in items:
|
55 |
+
#extrae_med = extrae_web(it)
|
56 |
extrae_dat = extrae_dato_web(it)
|
57 |
itxx = it[:-4]+'-'+it[-4:]
|
58 |
+
datos = [it, itxx] + list(extrae_dat)
|
59 |
+
#list(extrae_med) +
|
60 |
datos_tot.append(datos)
|
61 |
|
62 |
dtdf = pd.DataFrame(datos_tot)
|
63 |
dtdf.columns = ['part_no_', 'part_no',
|
64 |
+
#'descrip_en', 'length_m', 'width_m', 'height_m',
|
65 |
+
#'vol_m3',' compatible',
|
66 |
+
'alternate', 'peso_lb', 'precio_bm_us', 'descr']
|
67 |
now = datetime.now()
|
68 |
date_time = now.strftime("%m/%d/%Y, %H:%M:%S").replace('/','_').replace(':','_').replace(', ', '_')
|
69 |
dtdf['peso_kg'] = dtdf.peso_lb*0.453592
|
70 |
|
71 |
+
dtdf2 = dtdf[['part_no_', 'part_no', 'descr',
|
72 |
+
#'compatible',
|
73 |
+
#'length_m', 'width_m', 'height_m', 'vol_m3',
|
74 |
+
'peso_kg', 'precio_bm_us', 'alternate']]
|
75 |
|
76 |
df_xlsx = convierte_excel(dtdf2)
|
77 |
+
st.download_button(label='📩 Download XLSX', data=df_xlsx,
|
78 |
+
file_name = 'df_test'+date_time+'.xlsx')
|
79 |
+
|
80 |
csv = dtdf2.to_csv(index=False)
|
81 |
+
st.download_button(label='📩 Download CSV', data=csv, file_name = 'extraccion_'+date_time+'.csv')
|
|
|
82 |
|
83 |
else:
|
84 |
+
st.write('Please select loading option, load and proceed.')
|
85 |
|
86 |
c1, c2, c3 = st.columns([4,4,4])
|
87 |
with c3:
|
88 |
+
st.image('gdmklogo.png', width=100, caption='Updated by GestioDinámica in 2023')
|