openai
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 4 additions & 1 deletion b/‎.github/workflows/tests.yml‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 9 additions & 2 deletions b/‎Makefile‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tests/README.md‎
Lines changed: 3 additions & 0 deletions b/‎tests/README.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tests/conftest.py‎
Lines changed: 7 additions & 10 deletions b/‎tests/conftest.py‎
Lines changed: 7 additions & 10 deletions
diff --git a/‎tests/extensions/memory/test_dapr_redis_integration.py‎
Lines changed: 2 additions & 2 deletions b/‎tests/extensions/memory/test_dapr_redis_integration.py‎
Lines changed: 2 additions & 2 deletions
@@ -92,8 +92,11 @@ jobs:
         if: steps.changes.outputs.run == 'true'
         run: make sync
       - name: Run tests with coverage
-        if: steps.changes.outputs.run == 'true'
+        if: steps.changes.outputs.run == 'true' && matrix.python-version == '3.12'
         run: make coverage
+      - name: Run tests
+        if: steps.changes.outputs.run == 'true' && matrix.python-version != '3.12'
+        run: make tests
       - name: Skip tests
         if: steps.changes.outputs.run != 'true'
         run: echo "Skipping tests for non-code changes."
 
@@ -20,8 +20,15 @@ mypy:
 	uv run mypy . --exclude site
 
 .PHONY: tests
-tests: 
-	uv run pytest 
+tests: tests-parallel tests-serial
+
+.PHONY: tests-parallel
+tests-parallel:
+	uv run pytest -n auto --dist loadfile -m "not serial"
+
+.PHONY: tests-serial
+tests-serial:
+	uv run pytest -m serial
 
 .PHONY: coverage
 coverage:
 
@@ -50,6 +50,7 @@ dev = [
     "pytest",
     "pytest-asyncio",
     "pytest-mock>=3.14.0",
+    "pytest-xdist",
     "rich>=13.1.0, <14",
     "mkdocs>=1.6.0",
     "mkdocs-material>=9.6.0",
@@ -145,6 +146,7 @@ filterwarnings = [
 ]
 markers = [
     "allow_call_model_methods: mark test as allowing calls to real model implementations",
+    "serial: mark test as requiring serial execution",
 ]
 
 [tool.inline-snapshot]
 
@@ -8,6 +8,9 @@ Before running any tests, make sure you have `uv` installed (and ideally run `ma
 make tests
 ```
 
+`make tests` runs the shard-safe suite in parallel and then runs tests marked `serial`
+in a separate serial pass.
+
 ## Snapshots
 
 We use [inline-snapshots](https://15r10nk.github.io/inline-snapshot/latest/) for some tests. If your code adds new snapshot tests or breaks existing ones, you can fix/create them. After fixing/creating snapshots, run `make tests` again to verify the tests pass.
 
@@ -6,16 +6,20 @@
 from agents.models.openai_chatcompletions import OpenAIChatCompletionsModel
 from agents.models.openai_responses import OpenAIResponsesModel
 from agents.run import set_default_agent_runner
-from agents.tracing import set_trace_processors
-from agents.tracing.setup import get_trace_provider
+from agents.tracing.provider import DefaultTraceProvider
+from agents.tracing.setup import set_trace_provider
 
 from .testing_processor import SPAN_PROCESSOR_TESTING
 
 
 # This fixture will run once before any tests are executed
 @pytest.fixture(scope="session", autouse=True)
 def setup_span_processor():
-    set_trace_processors([SPAN_PROCESSOR_TESTING])
+    provider = DefaultTraceProvider()
+    provider.set_processors([SPAN_PROCESSOR_TESTING])
+    set_trace_provider(provider)
+    yield
+    provider.shutdown()
 
 
 # Ensure a default OpenAI API key is present for tests that construct clients
@@ -51,13 +55,6 @@ def clear_default_runner():
     set_default_agent_runner(None)
 
 
-# This fixture will run after all tests end
-@pytest.fixture(autouse=True, scope="session")
-def shutdown_trace_provider():
-    yield
-    get_trace_provider().shutdown()
-
-
 @pytest.fixture(autouse=True)
 def disable_real_model_clients(monkeypatch, request):
     # If the test is marked to allow the method call, don't override it.
 
@@ -51,8 +51,8 @@
 from tests.fake_model import FakeModel
 from tests.test_responses import get_text_message
 
-# Mark all tests as async
-pytestmark = pytest.mark.asyncio
+# Docker-backed integration tests should stay on the serial test path.
+pytestmark = [pytest.mark.asyncio, pytest.mark.serial]
 
 
 def wait_for_dapr_health(host: str, port: int, timeout: int = 60) -> bool:
Original file line number	Diff line number	Diff line change
`@@ -50,6 +50,7 @@ dev = [`
`50`	`50`	`"pytest",`
`51`	`51`	`"pytest-asyncio",`
`52`	`52`	`"pytest-mock>=3.14.0",`
	`53`	`+ "pytest-xdist",`
`53`	`54`	`"rich>=13.1.0, <14",`
`54`	`55`	`"mkdocs>=1.6.0",`
`55`	`56`	`"mkdocs-material>=9.6.0",`
`@@ -145,6 +146,7 @@ filterwarnings = [`
`145`	`146`	`]`
`146`	`147`	`markers = [`
`147`	`148`	`"allow_call_model_methods: mark test as allowing calls to real model implementations",`
	`149`	`+ "serial: mark test as requiring serial execution",`
`148`	`150`	`]`
`149`	`151`
`150`	`152`	`[tool.inline-snapshot]`