(1)安装Playwright依赖库(Playwright支持Async\Await语法,故需要Python3.7+)
pip install playwright
(2)安装Chromium、Firefox、WebKit等浏览器的驱动文件(内置浏览器)
python -m playwright install
(3)录屏,自动生成代码
python -m playwright codegen
起始页为xingzheai.cn开始录制
python -m playwright codegen https://xingzheai.cn/
打开xingzheai.cn,用Chromium驱动,将结果保存为my.py的python文件
python -m playwright codegen --target python -o 'my.py' -b chromium https://xingzheai.cn/
-target:规定生成脚本的语言,有JS和Python两种,默认为Python
-b:指定浏览器驱动
-o:将录制的脚本保存到一个文件
(4)常见的定位方法
#使用data-test-id:
page.click("data-test-id=login")
#CSS 和 XPath
page.click('div')
page.click('//html/body/div')
# 明确指定类型
page.click('css=div')
page.click('xpath=//html/body/div')
# 点击#free-month-promo元素内文本为“Sign Up”的元素
page.click('#free-month-promo >> text=Sign Up')
page.fill('css=[placeholder="Search GitHub"]')
page.fill('[placeholder="Search GitHub"]') # 简写
#通过文本子字符串查找
page.click('text="Login"')
page.click('"Login"'); # 简写
#获取某元素内的所有文本
print(page.evalOnSelector('.headerLogo', """e => e.textContent"""))
print(page.querySelector('.headerLogo').textContent())
#CSS 扩展 : visible
# 点击第一个button
page.click('button')
# 点击第一个可视的button,如果有其他不可视的,则会忽略他们
page.click('button:visible')
(5)截屏
page.screenshot(path=f'example-{browser_type.name}.png')
(6)同一个浏览器启动多个page,可以模拟多页面的场景
from playwright import sync_playwright
with sync_playwright() as p:
browser_type = p.chromium
browser = browser_type.launch(headless=False)
context = browser.newContext()
page1 = context.newPage()
page1.goto('https://mail.163.com/')
page1.screenshot(path=f'page1-{browser_type.name}.png')
page2 = context.newPage()
page2.goto("https://www.baidu.com/")
page2.screenshot(path=f'page2-{browser_type.name}.png')
context.close()
browser.close()
(7)处理frame,查找frame有三种方法:
from playwright import sync_playwright
with sync_playwright() as p:
browser_type = p.chromium
browser = browser_type.launch(headless=False)
page = browser.newPage()
page.goto('https://mail.163.com/')
# 通过selector、name、URL
login_frame = page.querySelector("[id^='x-URS-iframe']").contentFrame()
# login_frame = page.querySelector("#loginDiv>iframe").contentFrame()
# login_frame2 = page.frame("name").contentFrame()
# login_frame3 = page.frame("URL").contentFrame()
# 查看所有的frames
print(page.frames)
login_frame.fill("input[name='email']", "test123")
login_frame.fill("input[name='password']", "1234")
login_frame.click("#dologin")
page.screenshot(path=f'example-{browser_type.name}.png')
browser.close()
实践实例:
from playwright.sync_api import Playwright, sync_playwright, expect
def run(playwright: Playwright) -> None:
#启动一个驱动
browser = playwright.chromium.launch(headless=False)
context = browser.new_context()
# Open new page打开一个新的页面
page = context.new_page()
# Go to https://www.baidu.com/$#打开一个网站
page.goto("https://www.baidu.com/")
# Click input[name="wd"]
page.fill('//*[@id="kw"]',"快乐")
page.click('//*[@id="su"]')
# Click text=快乐(汉语词语) - 百度百科
with page.expect_popup() as popup_info:
page.locator("text=快乐(汉语词语) - 百度百科").click()
# Close page
page1.close()
# Close page
page.close()
# ---------------------
context.close()
browser.close()
with sync_playwright() as playwright:
run(playwright)
异步的案例
异步执行,示例如下:
#同时进行三个浏览器操作
import asyncio
from playwright import async_playwright
async def main():
async with async_playwright() as p:
for browser_type in [p.chromium, p.firefox, p.webkit]:
browser = await browser_type.launch()
page = await browser.newPage()
await page.goto('https://xingzheai.cn/')
await page.waitForSelector("text=智能内容审核")
await page.screenshot(path=f'example-{browser_type.name}.png')
await browser.close()
asyncio.get_event_loop().run_until_complete(main())