From c4ebd6ace149e4a417234552c9d17d6e70da2d00 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 30 Jun 2026 06:50:15 +0000 Subject: [PATCH 1/4] fix: Pin playwright to base image version for pip and poetry templates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The project template Dockerfile relied on a `sed` substitution to align the installed playwright version with the browser binaries baked into the `apify/actor-python-playwright*` base images. That substitution only rewrites a line starting with `playwright==`, but neither `requirements.txt` (pip) nor the exported `pyproject.toml`/`poetry.lock` reliably pin playwright directly — it is pulled in transitively via `crawlee[playwright]`. As a result pip installed the latest playwright release, replacing the version that matches the base image's browser binaries and causing a protocol mismatch (e.g. `Browser.setDefaultViewport` errors with camoufox). Replace the fragile `sed` trick in the pip and poetry branches with the same explicit pin already used by the uv branch: snapshot the base image's pre-installed playwright version, then force-reinstall that exact version after dependency installation. Non-playwright base images have no playwright pre-installed, so the pin is skipped there. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_012xSK5xwEhCUXbAeC9SVf29 --- .../{{cookiecutter.project_name}}/Dockerfile | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile b/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile index d5f5e928c1..70cf3dc97e 100644 --- a/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +++ b/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile @@ -31,13 +31,19 @@ COPY pyproject.toml poetry.lock ./ # Install the dependencies RUN echo "Python version:" \ && python --version \ + # Snapshot the base image's playwright version before installing so we can pin to it later + # and stay aligned with the browser binaries baked into the base image (the `hash` checks if + # playwright is installed; it is empty for non-playwright base images). + && PLAYWRIGHT_BASE_VERSION=$(hash playwright 2>/dev/null && (playwright --version | cut -d ' ' -f 2) || echo "") \ && echo "Installing dependencies:" \ - # Export packages from poetry.lock - && poetry export -f requirements.txt --without-hashes | \ - # Replace playwright version so that it matches whatever is pre-installed in the image (the `hash` checks if playwright is installed) - sed "s/^playwright==\(.*\)/playwright==$(hash playwright 2>/dev/null && (playwright --version | cut -d ' ' -f 2) || echo '\1')/" | \ - # Install everything using pip (ignore dependency checks - the lockfile is correct, period) - pip install -r /dev/stdin --no-dependencies \ + # Export packages from poetry.lock and install everything using pip + # (ignore dependency checks - the lockfile is correct, period) + && poetry export -f requirements.txt --without-hashes | pip install -r /dev/stdin --no-dependencies \ + # Pin playwright to whatever is pre-installed in the base image so it matches the browser binaries. + && if [ -n "$PLAYWRIGHT_BASE_VERSION" ]; then \ + echo "Pinning playwright to $PLAYWRIGHT_BASE_VERSION to match the browser binaries shipped in the base image" \ + && pip install --no-deps --force-reinstall "playwright==$PLAYWRIGHT_BASE_VERSION"; \ + fi \ && echo "All installed Python packages:" \ && pip freeze # % elif cookiecutter.package_manager == 'uv' @@ -74,13 +80,19 @@ COPY requirements.txt ./ # Install the dependencies RUN echo "Python version:" \ && python --version \ + # Snapshot the base image's playwright version before installing so we can pin to it later + # and stay aligned with the browser binaries baked into the base image (the `hash` checks if + # playwright is installed; it is empty for non-playwright base images). + && PLAYWRIGHT_BASE_VERSION=$(hash playwright 2>/dev/null && (playwright --version | cut -d ' ' -f 2) || echo "") \ && echo "Installing dependencies:" \ - # Install everything using pip, set playwright version so that it matches whatever is pre-installed in the image - && cat requirements.txt | \ - # Replace playwright version so that it matches whatever is pre-installed in the image (the `hash` checks if playwright is installed) - sed "s/^playwright==\(.*\)/playwright==$(hash playwright 2>/dev/null && (playwright --version | cut -d ' ' -f 2) || echo '\1')/" | \ - # Install everything using pip - pip install -r /dev/stdin \ + # Install everything using pip. Playwright is pulled in transitively via `crawlee[playwright]` + # and is not pinned in requirements.txt, so pip resolves the latest release here. + && pip install -r requirements.txt \ + # Pin playwright to whatever is pre-installed in the base image so it matches the browser binaries. + && if [ -n "$PLAYWRIGHT_BASE_VERSION" ]; then \ + echo "Pinning playwright to $PLAYWRIGHT_BASE_VERSION to match the browser binaries shipped in the base image" \ + && pip install --no-deps --force-reinstall "playwright==$PLAYWRIGHT_BASE_VERSION"; \ + fi \ && echo "All installed Python packages:" \ && pip freeze # % elif cookiecutter.package_manager == 'manual' From 3459e14f75c0cdc50cceae60def9d89e890179ce Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 30 Jun 2026 07:08:34 +0000 Subject: [PATCH 2/4] fix: Snapshot base playwright version before dependency install for pip/poetry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous attempt captured the base image's playwright version inside the dependency-install RUN. But the e2e template test patches the generated project (tests/e2e/project_template/utils.py) by injecting a `pip install ./crawlee-*.whl[playwright,...] --force-reinstall` step right after `COPY requirements.txt ./` / `COPY pyproject.toml ...`. That injected step resolves the unpinned playwright to the latest release *before* the snapshot ran, so the version captured (and pinned to) was the already-upgraded one — leaving the camoufox browser binary protocol mismatch unfixed. Move the snapshot into its own RUN placed before the COPY (and thus before any injected install step), writing it to /tmp/.base-playwright-version and reading it back when pinning — mirroring exactly what the working uv branch already does. This guarantees the captured version is the one baked into the base image and matching its browser binaries. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_012xSK5xwEhCUXbAeC9SVf29 --- .../{{cookiecutter.project_name}}/Dockerfile | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile b/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile index 70cf3dc97e..345bc135c6 100644 --- a/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +++ b/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile @@ -23,6 +23,12 @@ RUN pip install -U pip setuptools \ && pip install 'poetry<3' \ && poetry self add 'poetry-plugin-export' +# Snapshot the base image's playwright version before installing dependencies so we can pin to it +# later and stay aligned with the browser binaries baked into the base image. This must run before +# any dependency install (the `hash` checks if playwright is installed; the file is empty for +# non-playwright base images). +RUN hash playwright 2>/dev/null && playwright --version | cut -d ' ' -f 2 > /tmp/.base-playwright-version || true + # Second, copy just poetry.lock and pyproject.toml into the Actor image, # since those should be the only files that affects the dependency install in the next step, # in order to speed up the build @@ -31,19 +37,17 @@ COPY pyproject.toml poetry.lock ./ # Install the dependencies RUN echo "Python version:" \ && python --version \ - # Snapshot the base image's playwright version before installing so we can pin to it later - # and stay aligned with the browser binaries baked into the base image (the `hash` checks if - # playwright is installed; it is empty for non-playwright base images). - && PLAYWRIGHT_BASE_VERSION=$(hash playwright 2>/dev/null && (playwright --version | cut -d ' ' -f 2) || echo "") \ && echo "Installing dependencies:" \ # Export packages from poetry.lock and install everything using pip # (ignore dependency checks - the lockfile is correct, period) && poetry export -f requirements.txt --without-hashes | pip install -r /dev/stdin --no-dependencies \ # Pin playwright to whatever is pre-installed in the base image so it matches the browser binaries. + && PLAYWRIGHT_BASE_VERSION=$(cat /tmp/.base-playwright-version 2>/dev/null || echo "") \ && if [ -n "$PLAYWRIGHT_BASE_VERSION" ]; then \ echo "Pinning playwright to $PLAYWRIGHT_BASE_VERSION to match the browser binaries shipped in the base image" \ && pip install --no-deps --force-reinstall "playwright==$PLAYWRIGHT_BASE_VERSION"; \ fi \ + && rm -f /tmp/.base-playwright-version \ && echo "All installed Python packages:" \ && pip freeze # % elif cookiecutter.package_manager == 'uv' @@ -72,6 +76,12 @@ RUN echo "Python version:" \ # % elif cookiecutter.package_manager == 'pip' RUN pip install -U pip setuptools +# Snapshot the base image's playwright version before installing dependencies so we can pin to it +# later and stay aligned with the browser binaries baked into the base image. This must run before +# any dependency install (the `hash` checks if playwright is installed; the file is empty for +# non-playwright base images). +RUN hash playwright 2>/dev/null && playwright --version | cut -d ' ' -f 2 > /tmp/.base-playwright-version || true + # Second, copy just requirements.txt into the Actor image, # since it should be the only file that affects the dependency install in the next step, # in order to speed up the build @@ -80,19 +90,17 @@ COPY requirements.txt ./ # Install the dependencies RUN echo "Python version:" \ && python --version \ - # Snapshot the base image's playwright version before installing so we can pin to it later - # and stay aligned with the browser binaries baked into the base image (the `hash` checks if - # playwright is installed; it is empty for non-playwright base images). - && PLAYWRIGHT_BASE_VERSION=$(hash playwright 2>/dev/null && (playwright --version | cut -d ' ' -f 2) || echo "") \ && echo "Installing dependencies:" \ # Install everything using pip. Playwright is pulled in transitively via `crawlee[playwright]` # and is not pinned in requirements.txt, so pip resolves the latest release here. && pip install -r requirements.txt \ # Pin playwright to whatever is pre-installed in the base image so it matches the browser binaries. + && PLAYWRIGHT_BASE_VERSION=$(cat /tmp/.base-playwright-version 2>/dev/null || echo "") \ && if [ -n "$PLAYWRIGHT_BASE_VERSION" ]; then \ echo "Pinning playwright to $PLAYWRIGHT_BASE_VERSION to match the browser binaries shipped in the base image" \ && pip install --no-deps --force-reinstall "playwright==$PLAYWRIGHT_BASE_VERSION"; \ fi \ + && rm -f /tmp/.base-playwright-version \ && echo "All installed Python packages:" \ && pip freeze # % elif cookiecutter.package_manager == 'manual' From c25bd1437aa8acb7baca9d7f869b5aae45abbd52 Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Tue, 30 Jun 2026 11:57:38 +0200 Subject: [PATCH 3/4] Use Pw version resolver from the top --- .../{{cookiecutter.project_name}}/Dockerfile | 66 +++++++------------ 1 file changed, 23 insertions(+), 43 deletions(-) diff --git a/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile b/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile index 7205b71c38..ac4d73c80e 100644 --- a/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +++ b/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile @@ -1,16 +1,22 @@ # First, specify the base Docker image. # You can see the Docker images from Apify at https://hub.docker.com/r/apify/. # You can also use any other image from Docker Hub. + +# The Playwright version baked into the Apify Playwright base images. Keep this in sync with the +# `playwright` version resolved by the lockfile so the installed package matches the browser +# binaries shipped in the base image. +# % set playwright_version = '1.60.0' + # % if cookiecutter.crawler_type == 'playwright' or cookiecutter.crawler_type.startswith('adaptive-') or cookiecutter.crawler_type == 'stagehand' -# % set base_image = 'apify/actor-python-playwright:3.13-1.60.0' +# % set base_image = 'apify/actor-python-playwright:3.13-' ~ playwright_version # % elif cookiecutter.crawler_type == 'playwright-camoufox' -# % set base_image = 'apify/actor-python-playwright-camoufox:3.13-1.60.0' +# % set base_image = 'apify/actor-python-playwright-camoufox:3.13-' ~ playwright_version # % elif cookiecutter.crawler_type == 'playwright-chrome' -# % set base_image = 'apify/actor-python-playwright-chrome:3.13-1.60.0' +# % set base_image = 'apify/actor-python-playwright-chrome:3.13-' ~ playwright_version # % elif cookiecutter.crawler_type == 'playwright-firefox' -# % set base_image = 'apify/actor-python-playwright-firefox:3.13-1.60.0' +# % set base_image = 'apify/actor-python-playwright-firefox:3.13-' ~ playwright_version # % elif cookiecutter.crawler_type == 'playwright-webkit' -# % set base_image = 'apify/actor-python-playwright-webkit:3.13-1.60.0' +# % set base_image = 'apify/actor-python-playwright-webkit:3.13-' ~ playwright_version # % else # % set base_image = 'apify/actor-python:3.13' # % endif @@ -23,12 +29,6 @@ RUN pip install -U pip setuptools \ && pip install 'poetry<3' \ && poetry self add 'poetry-plugin-export' -# Snapshot the base image's playwright version before installing dependencies so we can pin to it -# later and stay aligned with the browser binaries baked into the base image. This must run before -# any dependency install (the `hash` checks if playwright is installed; the file is empty for -# non-playwright base images). -RUN hash playwright 2>/dev/null && playwright --version | cut -d ' ' -f 2 > /tmp/.base-playwright-version || true - # Second, copy just poetry.lock and pyproject.toml into the Actor image, # since those should be the only files that affects the dependency install in the next step, # in order to speed up the build @@ -41,22 +41,15 @@ RUN echo "Python version:" \ # Export packages from poetry.lock and install everything using pip # (ignore dependency checks - the lockfile is correct, period) && poetry export -f requirements.txt --without-hashes | pip install -r /dev/stdin --no-dependencies \ - # Pin playwright to whatever is pre-installed in the base image so it matches the browser binaries. - && PLAYWRIGHT_BASE_VERSION=$(cat /tmp/.base-playwright-version 2>/dev/null || echo "") \ - && if [ -n "$PLAYWRIGHT_BASE_VERSION" ]; then \ - echo "Pinning playwright to $PLAYWRIGHT_BASE_VERSION to match the browser binaries shipped in the base image" \ - && pip install --no-deps --force-reinstall "playwright==$PLAYWRIGHT_BASE_VERSION"; \ - fi \ - && rm -f /tmp/.base-playwright-version \ && echo "All installed Python packages:" \ && pip freeze +# % if cookiecutter.crawler_type.startswith('playwright') or cookiecutter.crawler_type.startswith('adaptive-') or cookiecutter.crawler_type == 'stagehand' +# Pin playwright to the version baked into the base image so it matches the browser binaries. +RUN pip install --no-deps --force-reinstall "playwright=={{ playwright_version }}" +# % endif # % elif cookiecutter.package_manager == 'uv' COPY --from=ghcr.io/astral-sh/uv:0.11 /uv /uvx /bin/ -# Snapshot the base image's playwright version before uv sync so we can pin to it later -# and stay aligned with the browser binaries baked into /pw-browsers. -RUN hash playwright 2>/dev/null && playwright --version | cut -d ' ' -f 2 > /tmp/.base-playwright-version || true - ENV UV_PROJECT_ENVIRONMENT="/usr/local" COPY pyproject.toml uv.lock ./ @@ -65,23 +58,15 @@ RUN echo "Python version:" \ && python --version \ && echo "Installing dependencies:" \ && uv sync --frozen --no-install-project --no-editable --quiet --no-dev --inexact \ - && PLAYWRIGHT_BASE_VERSION=$(cat /tmp/.base-playwright-version 2>/dev/null || echo "") \ - && if [ -n "$PLAYWRIGHT_BASE_VERSION" ]; then \ - echo "Pinning playwright to $PLAYWRIGHT_BASE_VERSION to match the browser binaries shipped in the base image" \ - && uv pip install --system --reinstall --no-deps "playwright==$PLAYWRIGHT_BASE_VERSION"; \ - fi \ - && rm -f /tmp/.base-playwright-version \ && echo "All installed Python packages:" \ && pip freeze +# % if cookiecutter.crawler_type.startswith('playwright') or cookiecutter.crawler_type.startswith('adaptive-') or cookiecutter.crawler_type == 'stagehand' +# Pin playwright to the version baked into the base image so it matches the browser binaries. +RUN uv pip install --system --reinstall --no-deps "playwright=={{ playwright_version }}" +# % endif # % elif cookiecutter.package_manager == 'pip' RUN pip install -U pip setuptools -# Snapshot the base image's playwright version before installing dependencies so we can pin to it -# later and stay aligned with the browser binaries baked into the base image. This must run before -# any dependency install (the `hash` checks if playwright is installed; the file is empty for -# non-playwright base images). -RUN hash playwright 2>/dev/null && playwright --version | cut -d ' ' -f 2 > /tmp/.base-playwright-version || true - # Second, copy just requirements.txt into the Actor image, # since it should be the only file that affects the dependency install in the next step, # in order to speed up the build @@ -91,18 +76,13 @@ COPY requirements.txt ./ RUN echo "Python version:" \ && python --version \ && echo "Installing dependencies:" \ - # Install everything using pip. Playwright is pulled in transitively via `crawlee[playwright]` - # and is not pinned in requirements.txt, so pip resolves the latest release here. && pip install -r requirements.txt \ - # Pin playwright to whatever is pre-installed in the base image so it matches the browser binaries. - && PLAYWRIGHT_BASE_VERSION=$(cat /tmp/.base-playwright-version 2>/dev/null || echo "") \ - && if [ -n "$PLAYWRIGHT_BASE_VERSION" ]; then \ - echo "Pinning playwright to $PLAYWRIGHT_BASE_VERSION to match the browser binaries shipped in the base image" \ - && pip install --no-deps --force-reinstall "playwright==$PLAYWRIGHT_BASE_VERSION"; \ - fi \ - && rm -f /tmp/.base-playwright-version \ && echo "All installed Python packages:" \ && pip freeze +# % if cookiecutter.crawler_type.startswith('playwright') or cookiecutter.crawler_type.startswith('adaptive-') or cookiecutter.crawler_type == 'stagehand' +# Pin playwright to the version baked into the base image so it matches the browser binaries. +RUN pip install --no-deps --force-reinstall "playwright=={{ playwright_version }}" +# % endif # % elif cookiecutter.package_manager == 'manual' # TODO install dependencies # % endif From e6080d39ba7fea1d02c2712c7bbce8609c6c6dcd Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Wed, 1 Jul 2026 10:36:14 +0200 Subject: [PATCH 4/4] Review comments --- .../{{cookiecutter.project_name}}/Dockerfile | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile b/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile index ac4d73c80e..dcab0ea94c 100644 --- a/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile +++ b/src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile @@ -2,9 +2,9 @@ # You can see the Docker images from Apify at https://hub.docker.com/r/apify/. # You can also use any other image from Docker Hub. -# The Playwright version baked into the Apify Playwright base images. Keep this in sync with the -# `playwright` version resolved by the lockfile so the installed package matches the browser -# binaries shipped in the base image. +{# The Playwright version baked into the Apify Playwright base images. Keep this in sync with the + `playwright` version resolved by the lockfile so the installed package matches the browser + binaries shipped in the base image. #} # % set playwright_version = '1.60.0' # % if cookiecutter.crawler_type == 'playwright' or cookiecutter.crawler_type.startswith('adaptive-') or cookiecutter.crawler_type == 'stagehand' @@ -20,6 +20,7 @@ # % else # % set base_image = 'apify/actor-python:3.13' # % endif +# % set uses_playwright_base_image = 'playwright' in base_image FROM {{ base_image }} RUN apt update && apt install -yq git && rm -rf /var/lib/apt/lists/* @@ -43,7 +44,7 @@ RUN echo "Python version:" \ && poetry export -f requirements.txt --without-hashes | pip install -r /dev/stdin --no-dependencies \ && echo "All installed Python packages:" \ && pip freeze -# % if cookiecutter.crawler_type.startswith('playwright') or cookiecutter.crawler_type.startswith('adaptive-') or cookiecutter.crawler_type == 'stagehand' +# % if uses_playwright_base_image # Pin playwright to the version baked into the base image so it matches the browser binaries. RUN pip install --no-deps --force-reinstall "playwright=={{ playwright_version }}" # % endif @@ -60,7 +61,7 @@ RUN echo "Python version:" \ && uv sync --frozen --no-install-project --no-editable --quiet --no-dev --inexact \ && echo "All installed Python packages:" \ && pip freeze -# % if cookiecutter.crawler_type.startswith('playwright') or cookiecutter.crawler_type.startswith('adaptive-') or cookiecutter.crawler_type == 'stagehand' +# % if uses_playwright_base_image # Pin playwright to the version baked into the base image so it matches the browser binaries. RUN uv pip install --system --reinstall --no-deps "playwright=={{ playwright_version }}" # % endif @@ -79,7 +80,7 @@ RUN echo "Python version:" \ && pip install -r requirements.txt \ && echo "All installed Python packages:" \ && pip freeze -# % if cookiecutter.crawler_type.startswith('playwright') or cookiecutter.crawler_type.startswith('adaptive-') or cookiecutter.crawler_type == 'stagehand' +# % if uses_playwright_base_image # Pin playwright to the version baked into the base image so it matches the browser binaries. RUN pip install --no-deps --force-reinstall "playwright=={{ playwright_version }}" # % endif