FROM python:3.12-slim
ARG PORT=8051
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
WORKDIR /app
# Install system dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
curl \
ca-certificates \
git && \
rm -rf /var/lib/apt/lists/*
# Install uv tool
RUN pip install --no-cache-dir uv
# Copy the source
COPY . .
# Install project in editable mode and setup
RUN pip install --no-cache-dir --system -e . && \
crawl4ai-setup
EXPOSE ${PORT}
CMD ["python", "src/crawl4ai_mcp.py"]
# Ignore VCS and common build artifacts .git .gitignore __pycache__/ **/*.pyc **/*.pyo *.pyd *.pyc env/ ENV/ venv/ ENV3/ *.env *.venv build/ dist/ *.egg-info/ *.pytest_cache/ .pytest_cache/ .mypy_cache/ *.DS_Store node_modules/ logs/
Concerns: RUN pip install --no-cache-dir --system -e . uses an invalid pip option --system which will likely fail the build., crawl4ai-setup is invoked in the same RUN step without a clear guarantee that the executable is available after editable install; this may cause build-time failures., COPY . . may transfer files not intended for production (e.g., local docs or secrets) depending on .dockerignore; not a failure per se but a potential risk.
FROM python:3.12-slim
ARG PORT=8051
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
WORKDIR /app
# Install system dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
curl \
ca-certificates \
git && \
rm -rf /var/lib/apt/lists/*
# Install uv tool (if available in environment). Fallback to plain pip for dependencies.
RUN pip install --no-cache-dir uv
# Copy the source
COPY . .
# Install project in editable mode and setup
RUN pip install --no-cache-dir -e . && \
crawl4ai-setup
EXPOSE ${PORT}
CMD ["python", "src/crawl4ai_mcp.py"]
Build timed out after 600s
FROM python:3.12-slim
ARG PORT=8051
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
WORKDIR /app
# Install system dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
curl \
ca-certificates \
git && \
rm -rf /var/lib/apt/lists/*
# Install uv tool (if available in environment). Fallback to plain pip for dependencies.
RUN pip install --no-cache-dir uv
# Copy the source
COPY . .
# Install project dependencies from pyproject.toml in editable mode
RUN pip install --no-cache-dir -e .
EXPOSE ${PORT}
CMD ["python", "src/crawl4ai_mcp.py"]
Concerns: The line 'RUN pip install --no-cache-dir uv' will fail if the PyPI package 'uv' does not exist or cannot be built; there is no fallback, so the image build may fail., Using 'COPY . .' may copy files not intended for the image (e.g., local config, tests, secrets); consider adding a .dockerignore., Installing build-essential and git may be heavier than needed for runtime; ensure these are truly required for building the package from source.
crawl4ai_mcp.egg-info __pycache__ .venv .env
FROM python:3.12-slim
ARG PORT=8051
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
WORKDIR /app
# Install system dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
curl \
ca-certificates \
git && \
rm -rf /var/lib/apt/lists/*
# Copy pyproject first to leverage docker cache for dependencies
COPY pyproject.toml ./
# Install project dependencies from pyproject.toml in editable mode
RUN pip install --no-cache-dir -e .
# Copy the rest of the source
COPY . .
EXPOSE ${PORT}
CMD ["python", "src/crawl4ai_mcp.py"]
Concerns: pip install -e . runs before copying the source, so there is nothing to install and the build will fail; reorder COPY . . to copy code before installation, ARG PORT is build-time only and is not exposed at runtime; consider using ENV PORT to configure the running app's port, No explicit HEALTHCHECK or runtime verification; the current Dockerfile relies on the build/install step but lacks a runtime healthcheck
FROM python:3.12-slim
ARG PORT=8051
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
WORKDIR /app
# Copy pyproject first to leverage docker cache for dependencies
COPY pyproject.toml ./
# Install project dependencies from pyproject.toml in editable mode
RUN pip install --no-cache-dir -e .
# Copy the rest of the source
COPY . .
EXPOSE ${PORT}
CMD ["python", "src/crawl4ai_mcp.py"]
Concerns: Editable install (-e .) relies on packaging configuration in pyproject.toml and a compatible build backend; if the project uses Poetry or another tool without setuptools/PEP 660 support, the editable install may fail., python:3.12-slim may lack build tools; if dependencies require compilation (native extensions), pip install could fail without adding system packages (e.g., build-essential) to the image., The application must bind to 0.0.0.0 to be accessible when port-mapped; EXPOSE only documents the port, but the runtime must listen on all interfaces for external access.
Smoke [PASS]: test -f /app/src/crawl4ai_mcp.py
Smoke [FAIL]: python -c 'import crawl4ai_mcp; print("import-ok")
Output: sh: 1: Syntax error: Unterminated quoted string
Smoke [PASS]: python -VActionable summary (under 1500 chars) - Purpose: MCP server "mcp-crawl4ai-rag" for web crawling with Crawl4AI, RAG, AI hallucination detection, and Neo4j knowledge-graph repository parsing. Uses Supabase for storage. - Core components/libraries: - FastMCP, AsyncWebCrawler, CrossEncoder, AIScriptAnalyzer, HallucinationReporter - Knowledge graph: KnowledgeGraphValidator, DirectNeo4jExtractor - Storage: Supabase client - Code utilities: generate_code_example_summary, extract_code_blocks, update_source_info, extract_source_summary - Path hookup: knowledge_graphs path added to sys.path; dotenv loading from project_root/.env - Environment/config flags (examples): - USE_RERANKING (default false): load CrossEncoder reranker - USE_KNOWLEDGE_GRAPH (default false): enable Neo4j components - NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD: Neo4j credentials - USE_AGENTIC_RAG (default false): enable code-example extraction - USE_HYBRID_SEARCH (default false): hybrid vector+keyword search - TRANSPORT: 'sse' or stdio for MCP server - HOST/PORT, etc. - Lifespan management: - crawl4ai_lifespan creates headless browser (BrowserConfig headless), initializes crawler, Supabase client, optional reranking model, and optional knowledge-graph components (validator and repo_extractor) if enabled. - On exit, closes components safely. - Key tools exposed (examples): - crawl_single_page(ctx, url): crawl one page, chunk content, store in Supabase, optionally extract and store code examples - smart_crawl_url(ctx, url, max_depth, max_concurrent, chunk_size): auto-detects sitemap/.txt/web page; stores chunks; returns crawl summary - get_available_sources(ctx): lists available sources from DB - perform_rag_query(ctx, query, source, match_count): vector/hybrid RAG query - search_code_examples(ctx, query, source_id, match_count): code example search (if enabled) - check_ai_script_hallucinations(ctx, script_path): validate script with knowledge graph - query_knowledge_graph(ctx, command): Neo4j knowledge-graph explorer (repos, explore, classes, class, method, query) - parse_github_repository(ctx, repo_url): clone/analyze GitHub repo; stores stats in Neo4j - parse_* helpers: crawl_markdown_file, crawl_batch, crawl_recursive_internal_links - main(): runs SDP or stdio transport based on TRANSPORT - Knowledge-graph command surface (as in query_knowledge_graph): - repos, explore <repo>, classes [<repo>], class <name>, method <name> [<class>], query <cypher> - Data flow and storage: - For crawls: generate chunks via smart_chunk_markdown; gather metadata; update_source_info first; then add_documents_to_supabase - Code examples: extracted in parallel (ThreadPoolExecutor) if USE_AGENTIC_RAG=true; stored via add_code_examples_to_supabase - Source summaries generated with extract_source_summary and stored - Notable helpers/logic: - is_sitemap, is_txt, parse_sitemap for crawl routing - smart_chunk_markdown respects code blocks, paragraphs, and sentences - validate_neo4j_connection and format_neo4j_error for user-friendly Neo4j errors - Error handling (examples): - Neo4j: “Neo4j authentication failed. Check NEO4J_USER and NEO4J_PASSWORD.”; “Cannot connect to Neo4j. Check NEO4J_URI and ensure Neo4j is running.” - GitHub repo: “Repository URL must start with https:// or git@” and “Repository URL is required” - Script path: “Script path is required”; “Cannot read script file: ...” - Run details: - Entry point: asyncio.run(main()) - Environment-driven behavior predominates (feature flags and Neo4j/Supabase wiring)
FROM python:3.12-slim
ARG PORT=8051
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
WORKDIR /app
# Install system dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
ca-certificates \
git && \
rm -rf /var/lib/apt/lists/*
# Install Python dependencies (pinned to known versions from pyproject)
RUN pip install --no-cache-dir \
crawl4ai==0.6.2 \
mcp==1.7.1 \
supabase==2.15.1 \
openai==1.71.0 \
dotenv==0.9.9 \
sentence-transformers>=4.1.0 \
neo4j>=5.28.1
# Copy project metadata first to enable docker cache for dependencies
COPY pyproject.toml ./
# Copy the rest of the source
COPY . .
EXPOSE ${PORT}
CMD ["python", "src/crawl4ai_mcp.py"]
Build timed out after 600s
FROM python:3.12-slim
ARG PORT=8051
WORKDIR /app
# Install uv helper (used by crawl4ai for installation)
RUN pip install uv
# Copy the MCP server files
COPY . .
# Install project dependencies from source using uv, in editable mode, then run setup if provided
RUN uv pip install -e . && \
crawl4ai-setup
EXPOSE ${PORT}
CMD ["python", "src/crawl4ai_mcp.py"]
- Exact error message and exit code: error: No virtual environment found; run `uv venv` to create an environment, or pass `--system` to install into a non-virtual environment Exit code: 2 - Failing command/step: [5/5] RUN uv pip install -e . && crawl4ai-setup - Missing packages or files mentioned: No virtual environment found (uv expected a venv). You need to create one with `uv venv` or install system-wide with `--system`. - Version mismatch info (if any): - uv-0.11.6 was installed earlier in the build - Base image used: python:3.12-slim No explicit version mismatch reported beyond the uv virtual env requirement.
FROM python:3.12-slim
ARG PORT=8051
WORKDIR /app
# System dependencies for building and running
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
curl \
ca-certificates \
git \
&& rm -rf /var/lib/apt/lists/*
# Install Python dependencies directly (system-wide)
RUN pip install --no-cache-dir \
crawl4ai==0.6.2 \
mcp==1.7.1 \
supabase==2.15.1 \
openai==1.71.0 \
dotenv==0.9.9 \
sentence-transformers>=4.1.0 \
neo4j>=5.28.1
# Copy the MCP server files
COPY . .
EXPOSE ${PORT}
CMD ["python", "src/crawl4ai_mcp.py"]
Concerns: COPY . . may copy large or sensitive files if a .dockerignore is not used; consider adding a .dockerignore to limit context., Using system-wide pip install in a slim image can lead to a larger image size and potential isolation issues; could consider a virtualenv or user-level install, but not strictly necessary.
Smoke [PASS]: test -f /app/src/crawl4ai_mcp.py
Smoke [FAIL]: python -c "import crawl4ai, mcp, openai, dotenv, sentence_transformers, neo4j; print('imports ok')
Output: sh: 1: Syntax error: Unterminated quoted string
Smoke [FAIL]: python -c "import sys; print(sys.version)
Output: sh: 1: Syntax error: Unterminated quoted stringFROM python:3.12-slim
ARG PORT=8051
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
WORKDIR /app
# Install system dependencies needed for builds
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
curl \
ca-certificates \
git && \
rm -rf /var/lib/apt/lists/*
# Copy pyproject to leverage Docker cache for dependencies
COPY pyproject.toml ./
# Install Python dependencies (from pyproject) in editable mode
RUN pip install --no-cache-dir -e .
# Copy the rest of the source code
COPY . .
EXPOSE ${PORT}
CMD ["python", "src/crawl4ai_mcp.py"]
Concerns: Editable install with 'pip install -e .' relies on the project being packaged for editable installs (setup.py or pyproject with proper build-system). If the project uses Poetry exclusively, dependencies may not install as expected., No non-root user is configured; the image installs build tools and runs as root, which may have security implications. Consider adding a non-root user and dropping privileges for runtime.
Smoke [PASS]: test -f /app/src/crawl4ai_mcp.py
Smoke [FAIL]: python -V && python -c "print('python-ok')
Output: sh: 1: Syntax error: Unterminated quoted string
Smoke [PASS]: python -m py_compile /app/src/crawl4ai_mcp.pyFROM python:3.12-slim
ARG PORT=8051
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
WORKDIR /app
# Install system dependencies for builds
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
curl \
ca-certificates \
git \
libffi-dev \
libssl-dev && \
rm -rf /var/lib/apt/lists/*
# Install Python dependencies from PyPI (matching pyproject.toml)
RUN pip install --no-cache-dir \
crawl4ai==0.6.2 \
mcp==1.7.1 \
supabase==2.15.1 \
openai==1.71.0 \
dotenv==0.9.9 \
sentence-transformers>=4.1.0 \
neo4j>=5.28.1
# Copy the source code
COPY . .
EXPOSE ${PORT}
CMD ["python", "src/crawl4ai_mcp.py"]
Concerns: COPY . . after dependency installation is fine for caching, but if the code imports change, ensure the entrypoint remains correct; no critical break here. Smoke [PASS]: pgrep -f crawl4ai_mcp.py >/dev/null 2>&1 && echo ok || echo not_running Smoke [PASS]: ss -ltnp 2>/dev/null | grep -m1 ':8051' >/dev/null && echo listening || echo not_listening Smoke [PASS]: curl -sS --max-time 2 http://127.0.0.1:8051/health || curl -sS --max-time 2 http://127.0.0.1:8051/ || echo no_http