jcmachicao commited on
Commit
faf1878
1 Parent(s): ae6afbe

Update funcs.py

Browse files
Files changed (1) hide show
  1. funcs.py +17 -6
funcs.py CHANGED
@@ -17,14 +17,17 @@ def extrae_dato_web(idx):
17
  lista0 = soup.find_all('h2')[0]
18
  lista1 = soup.find_all('dt')
19
  lista2 = soup.find_all('dd')
 
 
 
20
  if len(lista1)<3:
21
  lista1 = ['Alt NA'] + lista1
22
- lista2 = ['Alternate NA'] + lista2
23
  else:
24
  pass
25
- for i, j in zip(lista1, lista2):
26
  try:
27
- datx.append( float(j.text.replace('lbs', '').replace('$', '')) )
28
  except:
29
  datx.append(j)
30
  datx.append( lista0.text.split('-')[1:][0] )
@@ -88,7 +91,15 @@ def convierte_excel(df):
88
  worksheet = writer.sheets['data_extraida']
89
  format1 = workbook.add_format({'num_format': '0.00'})
90
  worksheet.set_column('A:A', None, format1)
91
- writer.save()
92
- processed_data = output.getvalue()
93
  writer.close()
94
- return processed_data
 
 
 
 
 
 
 
 
 
 
 
17
  lista0 = soup.find_all('h2')[0]
18
  lista1 = soup.find_all('dt')
19
  lista2 = soup.find_all('dd')
20
+
21
+ lista21 = [kj.text for kj in lista2]
22
+
23
  if len(lista1)<3:
24
  lista1 = ['Alt NA'] + lista1
25
+ lista21 = ['Alternate NA'] + lista21
26
  else:
27
  pass
28
+ for i, j in zip(lista1, lista21):
29
  try:
30
+ datx.append( float(j.replace('lbs', '').replace('$', '')) )
31
  except:
32
  datx.append(j)
33
  datx.append( lista0.text.split('-')[1:][0] )
 
91
  worksheet = writer.sheets['data_extraida']
92
  format1 = workbook.add_format({'num_format': '0.00'})
93
  worksheet.set_column('A:A', None, format1)
 
 
94
  writer.close()
95
+ processed_data = output.getvalue()
96
+ return processed_data
97
+
98
+ def encuentra_hoja(df):
99
+ xls_file = pd.ExcelFile(df)
100
+ sh_names = xls_file.sheet_names
101
+ for sn in sh_names:
102
+ d = pd.read_excel(df, sheet_name=sn)
103
+ if len(d.columns)==1:
104
+ ds = sn
105
+ return ds