Spaces:
Sleeping
Sleeping
Update Dockerfile
Browse files- Dockerfile +21 -1
Dockerfile
CHANGED
@@ -4,7 +4,27 @@ RUN useradd -m -u 1000 user
|
|
4 |
USER user
|
5 |
ENV PATH="/home/user/.local/bin:$PATH"
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
WORKDIR /app
|
10 |
|
|
|
4 |
USER user
|
5 |
ENV PATH="/home/user/.local/bin:$PATH"
|
6 |
|
7 |
+
# Set non-interactive mode to avoid user input issues
|
8 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
9 |
+
|
10 |
+
# Install required dependencies
|
11 |
+
RUN apt-get update && apt-get install -y \
|
12 |
+
tesseract-ocr \
|
13 |
+
libtesseract-dev \
|
14 |
+
poppler-utils
|
15 |
+
|
16 |
+
# Install required Python libraries
|
17 |
+
RUN pip install --no-cache-dir \
|
18 |
+
langchain \
|
19 |
+
pdf2image \
|
20 |
+
pytesseract \
|
21 |
+
pillow \
|
22 |
+
pymupdf \
|
23 |
+
pypdf \
|
24 |
+
unstructured[pdf]
|
25 |
+
|
26 |
+
# Set the Tesseract OCR path (optional, in case it's not found)
|
27 |
+
ENV TESSDATA_PREFIX="/usr/share/tesseract-ocr/4.00/tessdata"
|
28 |
|
29 |
WORKDIR /app
|
30 |
|