Spaces:
Sleeping
Sleeping
Update Dockerfile
Browse files- Dockerfile +7 -5
Dockerfile
CHANGED
@@ -7,13 +7,15 @@ ENV PATH="/home/user/.local/bin:$PATH"
|
|
7 |
# Set non-interactive mode to avoid user input issues
|
8 |
ENV DEBIAN_FRONTEND=noninteractive
|
9 |
|
10 |
-
#
|
11 |
-
RUN apt-get update
|
|
|
12 |
tesseract-ocr \
|
13 |
libtesseract-dev \
|
14 |
-
poppler-utils
|
|
|
15 |
|
16 |
-
# Install required Python
|
17 |
RUN pip install --no-cache-dir \
|
18 |
langchain \
|
19 |
pdf2image \
|
@@ -23,7 +25,7 @@ RUN pip install --no-cache-dir \
|
|
23 |
pypdf \
|
24 |
unstructured[pdf]
|
25 |
|
26 |
-
# Set
|
27 |
ENV TESSDATA_PREFIX="/usr/share/tesseract-ocr/4.00/tessdata"
|
28 |
|
29 |
WORKDIR /app
|
|
|
7 |
# Set non-interactive mode to avoid user input issues
|
8 |
ENV DEBIAN_FRONTEND=noninteractive
|
9 |
|
10 |
+
# Fix permission issues by using --allow-releaseinfo-change
|
11 |
+
RUN apt-get update --allow-releaseinfo-change && \
|
12 |
+
apt-get install -y --no-install-recommends \
|
13 |
tesseract-ocr \
|
14 |
libtesseract-dev \
|
15 |
+
poppler-utils && \
|
16 |
+
rm -rf /var/lib/apt/lists/* # Clean up
|
17 |
|
18 |
+
# Install required Python packages
|
19 |
RUN pip install --no-cache-dir \
|
20 |
langchain \
|
21 |
pdf2image \
|
|
|
25 |
pypdf \
|
26 |
unstructured[pdf]
|
27 |
|
28 |
+
# Set Tesseract OCR path
|
29 |
ENV TESSDATA_PREFIX="/usr/share/tesseract-ocr/4.00/tessdata"
|
30 |
|
31 |
WORKDIR /app
|