SoumyaJ commited on
Commit
1177087
·
verified ·
1 Parent(s): 7fe3c8f

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +21 -1
Dockerfile CHANGED
@@ -4,7 +4,27 @@ RUN useradd -m -u 1000 user
4
  USER user
5
  ENV PATH="/home/user/.local/bin:$PATH"
6
 
7
- RUN apt-get update && apt-get install -y tesseract-ocr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  WORKDIR /app
10
 
 
4
  USER user
5
  ENV PATH="/home/user/.local/bin:$PATH"
6
 
7
+ # Set non-interactive mode to avoid user input issues
8
+ ENV DEBIAN_FRONTEND=noninteractive
9
+
10
+ # Install required dependencies
11
+ RUN apt-get update && apt-get install -y \
12
+ tesseract-ocr \
13
+ libtesseract-dev \
14
+ poppler-utils
15
+
16
+ # Install required Python libraries
17
+ RUN pip install --no-cache-dir \
18
+ langchain \
19
+ pdf2image \
20
+ pytesseract \
21
+ pillow \
22
+ pymupdf \
23
+ pypdf \
24
+ unstructured[pdf]
25
+
26
+ # Set the Tesseract OCR path (optional, in case it's not found)
27
+ ENV TESSDATA_PREFIX="/usr/share/tesseract-ocr/4.00/tessdata"
28
 
29
  WORKDIR /app
30