python爬取推特图片_twitter图片视频批量下载

江曦
2023-12-01

import requests

import re

from urllib.request import urlretrieve

import os

import ssl

ssl._create_default_https_context = ssl._create_unverified_context

res=requests.session()

prox={

#如有合适的代理可以放在这里

}

h={

'authorization': ''# 需要抓包获取authorization

'x-csrf-token': '' ,#需要抓包获取x-csrf-token

'cookie': '' ,#需要抓包获取cookie

}

def download(file_link,file_name):

#下载媒体

if os.path.exists(file_name) == False:

urlretrieve(file_link, file_name)

print('下载完成')

else:

print('文件已存在')

pass

def getrestid(id):

#获取rest_id函数

if os.path.exists('./twitter/'+id)==False:

os.makedirs('./twitter/'+id)

idurl = 'https://api.twitter.com/graphql/ZRnOhhXPwue_JGILb9TNug/UserByScreenName?variables=%7B%22screen_name%22%3A%22' + id + '%22%2C%22withHighlightedLabel%22%3Atrue%7D'

rest_id = res.get(idurl, headers=h,proxies=prox).json()['data']['user']['rest_id']

return (rest_id)

def search(id):

rest_id=getrestid(id)#获取rest_id

daurl = 'https://api.twitter.com/2/timeline/media/' + rest_id + '.json?include_profile_interstitial_type=1.txt&include_blocking=1.txt&include_blocked_by=1.txt&include_followed_by=1.txt&include_want_retweets=1.txt&include_mute_edge=1.txt&include_can_dm=1.txt&include_can_media_tag=1.txt&skip_status=1.txt&cards_platform=Web-12&include_cards=1.txt&include_composer_source=true&include_ext_alt_text=true&include_reply_count=1.txt&tweet_mode=extended&include_entities=true&include_user_entities=true&include_ext_media_color=true&include_ext_media_availability=true&send_error_codes=true&simple_quoted_tweets=true&count=3000&ext=mediaStats%2ChighlightedLabel%2CcameraMoment'

dat = res.get(daurl, headers=h).json()['globalObjects']['tweets'].values()

values = [i for i in dat]

for i in values:

try:

try:

full_text = i['full_text']#推文正文

cretime = i['created_at']

media_url_list = i['extended_entities']['media']#媒体list

for j in media_url_list:

media_url = j['media_url_https']

media_type = j['type']

if media_type == 'video':#视频

media_name = './twitter/' + id + '/' + re.sub('https://\S+', '-', full_text) + re.sub('\+.*', '-',cretime) + str(media_url_list.index(j)) + '.mp4'

videos = j['video_info']['variants']

video_size = 0

video_url = ""

live_url = ""

for k in videos:

video_type = k['content_type']

if video_type == 'application/x-mpegURL':

live_url=k['url']

else:

if k.get('bitrate') > video_size:

video_size = k['bitrate']

video_url = k['url']

download(video_url,media_name)

else:

#图片

media_name = './twitter/' + id + '/' + re.sub('https://\S+', '-', full_text) + re.sub('\+.*', '-',cretime) + str(media_url_list.index(j)) + '.jpg'

download(media_url,media_name)

except:

#多个图片

media_url_list = i['entities']['media']

for j in media_url_list:

media_url = j['media_url_https']

media_name = 'twitter/' + id + '/' + re.sub('https://\S+', '-', full_text) + re.sub('\+.*', '-',media_url_list.index(j)) + '.jpg'

download(media_url, media_name)

except:

print('no media')

if __name__ == '__main__':

id=input()#输入用户id

search(id)

 类似资料: