爬虫 - - requests

谢哲瀚
2023-12-01

requests

  1. 文本处理
# 1.发送请求
# 添加header: 1.浏览器伪装(user_agent)、2.免密登录(cookie)、3.设置代理(proxies)
response = requests.get('https://cd.zu.ke.com/zufang')
# 浏览器伪装
header = {
    'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
    'cookie':'bid=9nWnomnWgaU; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1679904382%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DKot0lC8m97FdQ2KnRpGNoiZ7ZV5xpt1nDt0XjpnJQJf-k0Ovkm_0LTAnYdmoP6FEFdu8a6GzJ-G1EW9YLuK3W_%26wd%3D%26eqid%3Dbc5f5d53000032000000000664214e79%22%5D; _pk_id.100001.4cf6=e67f89f04105dfd3.1679904382.1.1679904382.1679904382.; _pk_ses.100001.4cf6=*; ap_v=0,6.0; __utma=30149280.674265592.1679904384.1679904384.1679904384.1; __utmb=30149280.0.10.1679904384; __utmc=30149280; __utmz=30149280.1679904384.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utma=223695111.378200773.1679904384.1679904384.1679904384.1; __utmb=223695111.0.10.1679904384; __utmc=223695111; __utmz=223695111.1679904384.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic'
}
response = requests.get('https://cd.zu.ke.com/zufang',headers=header)
#  设置解码方式
response.encoding='utf-8'
  1. 图片处理

    image1 = requests.get(
        'https://www.baidu.com/link?url=W3iQvu2mOEvA5FkDMzj_opHo2EECjYrwymrwUXn9SLEQodBZeQl2ZUyVtO2x2Q1k7cznnHVwzQbQ51JjGtyPDxB0-k2wiCZj_OkMSF6Q8sK1kaRI7Bn4hpEaP3v254xZLPBdpBe1XLRik6FHqNSHKDCpGLePjppwjj7XlEnOE4Rni0ngPEExQ16GTf2-JpV_gcJQceY_gYOfn12ag6f01ik5F-KCKocnan2BerDFXUA1Je3Y8i3CetmFFTrHjwYqw2QIix9035rZlat-pdsKOhefRZ4yJHVTvYJzLO1ECTjxI4JYNpp7Opt5m1IX4RIf7HSna9C3m6U2ZWNDO2cmdCqE5pkFgS7wYl69BInx1zfP8PdgDLaoAT9s5AyVFHQyTdb_no6ergknM61a4D_lrcpLovRmqHHmkG6rC-7sAC5un4Jf4qEYh8MujOeKjWDH22CkYjNiGaayZHtINZA9e28tuBF4I8Z-2sU4ZM2ocLRyh5lIcBVIf7OyBuusM1AhNoX7ZVN2knKEeCP-6EFcYX9u0iHUMUdGcNVhuryb72RUKFo9aNWw-cGJC1vm9V4YA-RaP1yvnO0w_FXsOJdYzXYW8E6LL_uAC7Lu-S464dxeunmiOenfGW75ewkbN4E6O84AO2UREXmfIRcntq3ZH_VnxWoaQVf1X8lHN4fyYyG&wd=&eqid=be00fc29000389170000000664214705')
    result = image1.content
    with 
    print()
    
  2. json接口

    hero = requests.get('https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js?ts=2799844')
    print(hero.json())
    
 类似资料: