|
5 | 5 | import asyncio |
6 | 6 | import base64 |
7 | 7 | import sys |
| 8 | +from collections.abc import AsyncIterator |
| 9 | +from contextlib import asynccontextmanager |
8 | 10 | from typing import Any, Literal |
9 | 11 |
|
10 | 12 | from playwright.async_api import Browser, Page, Playwright, async_playwright |
@@ -118,46 +120,77 @@ async def screenshot(self) -> str: |
118 | 120 | png_bytes = await self.page.screenshot(full_page=False) |
119 | 121 | return base64.b64encode(png_bytes).decode("utf-8") |
120 | 122 |
|
121 | | - async def click(self, x: int, y: int, button: Button = "left") -> None: |
| 123 | + def _normalize_keys(self, keys: list[str] | None) -> list[str]: |
| 124 | + if not keys: |
| 125 | + return [] |
| 126 | + return [CUA_KEY_TO_PLAYWRIGHT_KEY.get(key.lower(), key) for key in keys] |
| 127 | + |
| 128 | + @asynccontextmanager |
| 129 | + async def _hold_keys(self, keys: list[str] | None) -> AsyncIterator[None]: |
| 130 | + mapped_keys = self._normalize_keys(keys) |
| 131 | + try: |
| 132 | + for key in mapped_keys: |
| 133 | + await self.page.keyboard.down(key) |
| 134 | + yield |
| 135 | + finally: |
| 136 | + for key in reversed(mapped_keys): |
| 137 | + await self.page.keyboard.up(key) |
| 138 | + |
| 139 | + async def click( |
| 140 | + self, x: int, y: int, button: Button = "left", *, keys: list[str] | None = None |
| 141 | + ) -> None: |
122 | 142 | playwright_button: Literal["left", "middle", "right"] = "left" |
123 | 143 |
|
124 | 144 | # Playwright only supports left, middle, right buttons |
125 | 145 | if button in ("left", "right", "middle"): |
126 | 146 | playwright_button = button # type: ignore |
127 | 147 |
|
128 | | - await self.page.mouse.click(x, y, button=playwright_button) |
| 148 | + async with self._hold_keys(keys): |
| 149 | + await self.page.mouse.click(x, y, button=playwright_button) |
129 | 150 |
|
130 | | - async def double_click(self, x: int, y: int) -> None: |
131 | | - await self.page.mouse.dblclick(x, y) |
| 151 | + async def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: |
| 152 | + async with self._hold_keys(keys): |
| 153 | + await self.page.mouse.dblclick(x, y) |
132 | 154 |
|
133 | | - async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: |
134 | | - await self.page.mouse.move(x, y) |
135 | | - await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})") |
| 155 | + async def scroll( |
| 156 | + self, |
| 157 | + x: int, |
| 158 | + y: int, |
| 159 | + scroll_x: int, |
| 160 | + scroll_y: int, |
| 161 | + *, |
| 162 | + keys: list[str] | None = None, |
| 163 | + ) -> None: |
| 164 | + async with self._hold_keys(keys): |
| 165 | + await self.page.mouse.move(x, y) |
| 166 | + await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})") |
136 | 167 |
|
137 | 168 | async def type(self, text: str) -> None: |
138 | 169 | await self.page.keyboard.type(text) |
139 | 170 |
|
140 | 171 | async def wait(self) -> None: |
141 | 172 | await asyncio.sleep(1) |
142 | 173 |
|
143 | | - async def move(self, x: int, y: int) -> None: |
144 | | - await self.page.mouse.move(x, y) |
| 174 | + async def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: |
| 175 | + async with self._hold_keys(keys): |
| 176 | + await self.page.mouse.move(x, y) |
145 | 177 |
|
146 | 178 | async def keypress(self, keys: list[str]) -> None: |
147 | | - mapped_keys = [CUA_KEY_TO_PLAYWRIGHT_KEY.get(key.lower(), key) for key in keys] |
| 179 | + mapped_keys = self._normalize_keys(keys) |
148 | 180 | for key in mapped_keys: |
149 | 181 | await self.page.keyboard.down(key) |
150 | 182 | for key in reversed(mapped_keys): |
151 | 183 | await self.page.keyboard.up(key) |
152 | 184 |
|
153 | | - async def drag(self, path: list[tuple[int, int]]) -> None: |
| 185 | + async def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: |
154 | 186 | if not path: |
155 | 187 | return |
156 | | - await self.page.mouse.move(path[0][0], path[0][1]) |
157 | | - await self.page.mouse.down() |
158 | | - for px, py in path[1:]: |
159 | | - await self.page.mouse.move(px, py) |
160 | | - await self.page.mouse.up() |
| 188 | + async with self._hold_keys(keys): |
| 189 | + await self.page.mouse.move(path[0][0], path[0][1]) |
| 190 | + await self.page.mouse.down() |
| 191 | + for px, py in path[1:]: |
| 192 | + await self.page.mouse.move(px, py) |
| 193 | + await self.page.mouse.up() |
161 | 194 |
|
162 | 195 |
|
163 | 196 | async def run_agent( |
|
0 commit comments