当前位置: 首页 > 工具软件 > pyquery-ql > 使用案例 >

pyquery用法详解

左丘子平
2023-12-01

PyQuery库也是一个非常强大又灵活的网页解析库,如果你有前端开发经验的,都应该接触过jQuery,那么PyQuery就是你非常绝佳的选择,PyQuery 是 Python 仿照 jQuery 的严格实现。语法与 jQuery 几乎完全相同,所以不用再去费心去记一些奇怪的方法了。
官网地址:http://pyquery.readthedocs.io/en/latest/
pyquery的安装,可参考博客:http://blog.csdn.net/qq_29186489/article/details/78581249
pyquery的初始化
初始化方式:字符串初始化、URL初始化、文件初始化

from pyquery import PyQuery as pq
html='''
<div id="container">
    <ul class="list">
        <li class="item-0">first item</li>
        <li class="item-1"><a href="link2.html">second item</a></li>
        <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
        <li class="item-1 active"><a href="link4.html">fourth item</a></li>
        <li class="item-0"><a href="link5.html">fifth item</a></li>
    </ul>
</div>
'''
#字符串初始化
doc=pq(html)
print(doc("li"))
#URL初始化
doc=pq(url="http://www.baidu.com")
print(doc("head"))
#文件初始化
doc=pq(filename="demo.html")
print(doc("li"))

基本的CSS选择器

# -*- coding: utf-8 -*-
from pyquery import PyQuery as pq
html='''
<div id="container">
    <ul class="list">
        <li class="item-0">first item</li>
        <li class="item-1"><a href="link2.html">second item</a></li>
        <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
        <li class="item-1 active"><a href="link4.html">fourth item</a></li>
        <li class="item-0"><a href="link5.html">fifth item</a></li>
    </ul>
</div>
'''
doc=pq(html)
print(doc(".item-1"))

子元素

# -*- coding: utf-8 -*-
from pyquery import PyQuery as pq
html='''
<div id="container">
    <ul class="list">
        <li class="item-0">first item</li>
        <li class="item-1"><a href="link2.html">second item</a></li>
        <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
        <li class="item-1 active"><a href="link4.html">fourth item</a></li>
        <li class="item-0"><a href="link5.html">fifth item</a></li>
    </ul>
</div>
'''
doc=pq(html)
item=doc(".list")
print(type(item))
print(item)
lis=item.find(".item-1")
print(type(lis))
print(lis)
lis=item.children()
print(lis)
lis=item.children(".item-0")
print(lis)

父元素

# -*- coding: utf-8 -*-
from pyquery import PyQuery as pq
html='''
<div id="container">
    <ul class="list">
        <li class="item-0">first item</li>
        <li class="item-1"><a href="link2.html">second item</a></li>
        <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
        <li class="item-1 active"><a href="link4.html">fourth item</a></li>
        <li class="item-0"><a href="link5.html">fifth item</a></li>
    </ul>
</div>
'''
doc=pq(html)
item=doc(".item-0")
container=item.parent()
print(type(container))
print(container)
parents=item.parents()
print(parents)
parents=item.parents(".list")
print(parents)

兄弟元素

# -*- coding: utf-8 -*-
from pyquery import PyQuery as pq
html='''
<div id="container">
    <ul class="list">
        <li class="item-0">first item</li>
        <li class="item-1"><a href="link2.html">second item</a></li>
        <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
        <li class="item-1 active"><a href="link4.html">fourth item</a></li>
        <li class="item-0"><a href="link5.html">fifth item</a></li>
    </ul>
</div>
'''
doc=pq(html)
item=doc(".item-0.active")
s=item.siblings()
print(type(s))
print(s)
s=item.siblings(".active")
print(s)

遍历元素

# -*- coding: utf-8 -*-
from pyquery import PyQuery as pq
html='''
<div id="container">
    <ul class="list">
        <li class="item-0">first item</li>
        <li class="item-1"><a href="link2.html">second item</a></li>
        <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
        <li class="item-1 active"><a href="link4.html">fourth item</a></li>
        <li class="item-0"><a href="link5.html">fifth item</a></li>
    </ul>
</div>
'''
doc=pq(html)
items=doc(".item-0").items()
for item in items:
    print(item)

获取信息
获取属性信息

# -*- coding: utf-8 -*-
from pyquery import PyQuery as pq
html='''
<div id="container">
    <ul class="list">
        <li class="item-0">first item</li>
        <li class="item-1"><a href="link2.html">second item</a></li>
        <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
        <li class="item-1 active"><a href="link4.html">fourth item</a></li>
        <li class="item-0"><a href="link5.html">fifth item</a></li>
    </ul>
</div>
'''
doc=pq(html)
item=doc(".item-0.active a")
print(item.attr("href"))
print(item.attr.href)

获取文本

# -*- coding: utf-8 -*-
from pyquery import PyQuery as pq
html='''
<div id="container">
    <ul class="list">
        <li class="item-0">first item</li>
        <li class="item-1"><a href="link2.html">second item</a></li>
        <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
        <li class="item-1 active"><a href="link4.html">fourth item</a></li>
        <li class="item-0"><a href="link5.html">fifth item</a></li>
    </ul>
</div>
'''
doc=pq(html)
item=doc(".item-0.active a")
print(item.text())

获取HTML

# -*- coding: utf-8 -*-
from pyquery import PyQuery as pq
html='''
<div id="container">
    <ul class="list">
        <li class="item-0">first item</li>
        <li class="item-1"><a href="link2.html">second item</a></li>
        <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
        <li class="item-1 active"><a href="link4.html">fourth item</a></li>
        <li class="item-0"><a href="link5.html">fifth item</a></li>
    </ul>
</div>
'''
doc=pq(html)
item=doc(".item-0.active a")
print(item.html())

DOM操作
移除类和增加类:removeClass、addClass

# -*- coding: utf-8 -*-
from pyquery import PyQuery as pq
html='''
<div id="container">
    <ul class="list">
        <li class="item-0">first item</li>
        <li class="item-1"><a href="link2.html">second item</a></li>
        <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
        <li class="item-1 active"><a href="link4.html">fourth item</a></li>
        <li class="item-0"><a href="link5.html">fifth item</a></li>
    </ul>
</div>
'''
doc=pq(html)
item=doc(".item-0.active")
print(item)
item.removeClass("active")
print(item)
item.addClass("active")
print(item)

add、css

# -*- coding: utf-8 -*-
from pyquery import PyQuery as pq
html='''
<div id="container">
    <ul class="list">
        <li class="item-0">first item</li>
        <li class="item-1"><a href="link2.html">second item</a></li>
        <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
        <li class="item-1 active"><a href="link4.html">fourth item</a></li>
        <li class="item-0"><a href="link5.html">fifth item</a></li>
    </ul>
</div>
'''
doc=pq(html)
item=doc(".item-0.active")
print(item)
item.attr("name","link")
print(item)
item.css("font-size","14px")
print(item)

remove

# -*- coding: utf-8 -*-
from pyquery import PyQuery as pq
html='''
<div id="container">
    hello world!
    <p>This is a paragram</p>
</div>
'''
doc=pq(html)
item=doc("#container")
print(item.text())
item.find("p").remove()
print(item.text())

伪类选择器

# -*- coding: utf-8 -*-
from pyquery import PyQuery as pq
html='''
<div id="container">
    <ul class="list">
        <li class="item-0">first item</li>
        <li class="item-1"><a href="link2.html">second item</a></li>
        <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
        <li class="item-1 active"><a href="link4.html">fourth item</a></li>
        <li class="item-0"><a href="link5.html">fifth item</a></li>
    </ul>
</div>
'''
doc=pq(html)
#第一个元素
li=doc("li:first-child")
print(li)
#最后一个元素
li=doc("li:last-child")
print(li)
#第二个元素
li=doc("li:nth-child(2)")
print(li)
#索引大于2的元素
li=doc("li:gt(2)")
print(li)
#包含某项内容的元素
li=doc("li:contains(fifth)")
print(li)
 类似资料: