Spaces:
Sleeping
Sleeping
FROM python:3.9 | |
RUN useradd -m -u 1000 user | |
USER user | |
ENV PATH="/home/user/.local/bin:$PATH" | |
# Set non-interactive mode to avoid user input issues | |
ENV DEBIAN_FRONTEND=noninteractive | |
# Fix permission issues by using --allow-releaseinfo-change | |
RUN apt-get update --allow-releaseinfo-change && \ | |
apt-get install -y --no-install-recommends \ | |
tesseract-ocr \ | |
libtesseract-dev \ | |
poppler-utils && \ | |
rm -rf /var/lib/apt/lists/* # Clean up | |
# Install required Python packages | |
RUN pip install --no-cache-dir \ | |
langchain \ | |
pdf2image \ | |
pytesseract \ | |
pillow \ | |
pymupdf \ | |
pypdf \ | |
unstructured[pdf] | |
# Set Tesseract OCR path | |
ENV TESSDATA_PREFIX="/usr/share/tesseract-ocr/4.00/tessdata" | |
WORKDIR /app | |
COPY --chown=user ./requirements.txt requirements.txt | |
RUN pip install --no-cache-dir --upgrade -r requirements.txt | |
COPY --chown=user . /app | |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] |