acrowth commited on
Commit
7531e94
·
1 Parent(s): 78a9a5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -0
app.py CHANGED
@@ -25,6 +25,7 @@ def findpdftype(file):
25
  ocr_df[float_cols] = ocr_df[float_cols].round(0).astype(int)
26
  ocr_df = ocr_df.replace(r'^\s*$', np.nan, regex=True)
27
  words = ' '.join([word for word in ocr_df.text if str(word) != 'nan'])
 
28
  results.append(words)
29
  results=pipe(results)
30
  return pd.DataFrame(results).label.value_counts().sort_values(ascending=False).index[0]
 
25
  ocr_df[float_cols] = ocr_df[float_cols].round(0).astype(int)
26
  ocr_df = ocr_df.replace(r'^\s*$', np.nan, regex=True)
27
  words = ' '.join([word for word in ocr_df.text if str(word) != 'nan'])
28
+ words = words[:1000]
29
  results.append(words)
30
  results=pipe(results)
31
  return pd.DataFrame(results).label.value_counts().sort_values(ascending=False).index[0]