File size: 3,071 Bytes
b6c448a
 
 
 
 
 
ae6afbe
b6c448a
 
ae6afbe
b6c448a
 
 
 
 
 
 
 
 
 
ae6afbe
 
 
b6c448a
 
 
 
ae6afbe
b6c448a
 
 
ae6afbe
b6c448a
ae6afbe
 
b6c448a
 
 
 
ae6afbe
b6c448a
ae6afbe
 
 
b6c448a
 
 
 
 
 
 
 
ae6afbe
b6c448a
ae6afbe
b6c448a
 
ae6afbe
 
b6c448a
 
 
 
ae6afbe
 
 
b6c448a
 
 
 
ae6afbe
 
 
 
b6c448a
 
ae6afbe
 
 
b6c448a
ae6afbe
b6c448a
 
ae6afbe
b6c448a
 
 
ae6afbe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding: utf-8 -*-

import streamlit as st
import pandas as pd
import base64
from pyxlsb import open_workbook as open_xlsb
from io import BytesIO
from datetime import datetime
from funcs import extrae_dato_web, extrae_web, extrae_alternate, convierte_excel
from funcs import encuentra_hoja
import bs4 as BeautifulSoup
import urllib.request
from urllib.request import urlopen, Request
import re

c1, c2 = st.columns([6,6])
with c2:
    st.image('encopartslogo.jpg', width=300, caption='https://encoparts.com/')

rutas_websearch = ['https://en.hespareparts.com/search/?search=', 'https://offroadeq.com/parts-search/'] 
st.title('Data Extraction')
st.subheader('Part Number Loading A')
selec = st.radio('Select: ', [None, 'Comma Separated Text', 'Excel File Loading'])
items = None

if selec is None:
    
    st.write('Please select data loading method.')

else:

    if selec == 'Comma Separated Text' and items is None:
        st.write(selec)
        codigos = st.text_input('Paste or write here the text, separated by commas: ')
        if st.button('Proceed'):
            items = list(codigos.split(','))
    
    else:
        st.write(selec)
        file = st.file_uploader('Select an Excel File: ')
        if file is not None:
            hojax = encuentra_hoja(file)
            codigosf = pd.read_excel(file, sheet_name=hojax)
            st.write('Rows, Columns: ', codigosf.shape)
            namcol = codigosf.columns[0]
            items = pd.Series(codigosf[namcol]).astype(str)

if selec is not None and items is not None:    

    st.write(items)
        
    datos_tot = []
    st.write('Please wait while data is being processed ...')
    for it in items:
      #extrae_med = extrae_web(it)
      extrae_dat = extrae_dato_web(it)
      itxx = it[:-4]+'-'+it[-4:]
      datos = [it, itxx] + list(extrae_dat)
        #list(extrae_med) + 
      datos_tot.append(datos)
      
    dtdf = pd.DataFrame(datos_tot)
    dtdf.columns = ['part_no_', 'part_no', 
                    #'descrip_en', 'length_m', 'width_m', 'height_m', 
                    #'vol_m3',' compatible', 
                    'alternate', 'peso_lb', 'precio_bm_us', 'descr']
    now = datetime.now()
    date_time = now.strftime("%m/%d/%Y, %H:%M:%S").replace('/','_').replace(':','_').replace(', ', '_')
    dtdf['peso_kg'] = dtdf.peso_lb*0.453592
    
    dtdf2 = dtdf[['part_no_', 'part_no', 'descr',
                  #'compatible',
                  #'length_m', 'width_m', 'height_m', 'vol_m3', 
                  'peso_kg', 'precio_bm_us', 'alternate']]
    
    df_xlsx = convierte_excel(dtdf2)
    st.download_button(label='📩 Download XLSX', data=df_xlsx,
                                file_name = 'df_test'+date_time+'.xlsx')

    csv = dtdf2.to_csv(index=False)
    st.download_button(label='📩 Download CSV', data=csv, file_name = 'extraccion_'+date_time+'.csv')
    
else:
    st.write('Please select loading option, load and proceed.')

c1, c2, c3 = st.columns([4,4,4])
with c3:
    st.image('gdmklogo.png', width=100, caption='Updated by GestioDinámica in 2023')