import requests
from bs4 import BeautifulSoup
import json
import time
import csv
url='https://www.google.com/search'
params = {
'safe': 'active',
'sxsrf': 'ALeKk03BfKAgtDobSNR4uteXut6N__y38g:1611033698497',
'ei': 'YmwGYLPyHdXh-AbPzKToCQ',
'q': '',
'gs_ssp': 'eJzj4tLP1TcwKzcqzjM1YPTizSotLsnMU0jKTE1KLQIAcHYIsw',
'oq': 'justi',
'gs_lcp': 'CgZwc3ktYWIQAxgAMg0ILhCxAxCDARBDEJMCMgoILhCxAxCDARBDMgQILhBDMgcILhCxAxBDMgcIABCxAxBDMgcILhCxAxBDMgQIABBDMggILhCxAxCDATICCAAyBQgAELEDOgUIABCRAjoICAAQsQMQgwE6BQguELEDUOqpBljaugZgjMsGaABwAngBgAGXBYgBvRKSAQkyLTEuMC4yLjKYAQCgAQGqAQdnd3Mtd2l6wAEB',
'sclient': 'psy-ab',
'start': '',
'sa': 'N',
'ved': '2ahUKEwjf-LW_t6fuAhWFd94KHXqXBo0Q8tMDegQIVxA2',
'biw': '876',
'bih': '900',
'dpr': '1.5'
}
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-language': 'en,zh-CN;q=0.9,zh;q=0.8',
'cache-control': 'no-cache',
'cookie': 'CGIC=IocBdGV4dC9odG1sLGFwcGxpY2F0aW9uL3hodG1sK3htbCxhcHBsaWNhdGlvbi94bWw7cT0wLjksaW1hZ2UvYXZpZixpbWFnZS93ZWJwLGltYWdlL2FwbmcsKi8qO3E9MC44LGFwcGxpY2F0aW9uL3NpZ25lZC1leGNoYW5nZTt2PWIzO3E9MC45; HSID=AtL1Fuks6GN88NgIF; SSID=A5G1V1zzIEPkl30hy; APISID=R62Dg908smzBc75_/AE6wtPR0lHi3PNcyr; SAPISID=DhY-UzCNe22FLTow/AjmaKK4_-XFREXtVX; __Secure-3PAPISID=DhY-UzCNe22FLTow/AjmaKK4_-XFREXtVX; SID=5AcO7y9MYqK1JBYbOB9T7xeWcJwKQjOtRbbCh60AFgdUo6QyiT-wrT0furAG-H4tCk87hA.; __Secure-3PSID=5AcO7y9MYqK1JBYbOB9T7xeWcJwKQjOtRbbCh60AFgdUo6Qyoq7YhstvGbLeLKfl7S3HHw.; OTZ=5796681_24_24__24_; SEARCH_SAMESITE=CgQI0ZEB; ANID=AHWqTUnyp6Ge1xtQ_TL0NAZmTvouupte3kUSnVW6oKAZd5CJZEL6eTEKr8Dvuy3J; 1P_JAR=2021-01-19-03; NID=207=QJ3H1_PEEqH87e2HJ9-LYqdl8T4kq3B7Ybxa6cnWTvf6FOu5kuFPIMN6sjPTa6uGQQgd_ILLwdgrFexdNcG1edmOLPwamevgB8wWBZTt8zDQ0C1qGnoO-0HT4-DD2bHyIv4mYZhQXkQwYYY1YlC1woUo4hIJbB6fI9shEBm_UIAElqTHRWjiudjgWc_VM69_cCacl4muQHElSfs-ok-7L6w0kp8-3pl6A8YVtdhoev3ms0LXWcOyTpmRP9vhrHNKBuZpSeRAuCGLI2PLSqtQspyzHMf3A18sqZIUM2hCgIhExcjR1UKwRrn7ikGllVwFqFSJDXAD9A; SIDCC=AJi4QfEXFZmOSCUgZxKqFr4vUETalX_wQAJOEnDTUtPaFDY2oUOdAha_UIyodIM1esXg72G-8w; __Secure-3PSIDCC=AJi4QfHq0q1H0qhjNe1pbyTuSEzm0nH3jNZD2QTrv-4yP_7QVwZ28wEHI0d7YL1SGNRlMlF1ow',
'pragma': 'no-cache',
'referer': 'https://www.google.com/',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
'x-client-data': 'CIi2yQEIpLbJAQjBtskBCKmdygEIx8LKAQisx8oBCLTLygEIpM3KAQjAz8oBCNzVygEIlJrLAQjNmssBCNScywEIqZ3LAQiqncsBCK6dywEY+bjKARiqm8sB'
}
# 'accept-encoding': 'gzip, deflate, br',
params['q'] = 'justin bieber'
results = []
for i in range(0,5):
params['start'] = str(i * 10)
response = requests.get(url,params=params,headers=headers)
print('Get Response from : %s | Status Code : %s' %(response.url, response.status_code))
content = BeautifulSoup(response.text, "lxml")
blocks = content.findAll('div',{'class','tF2Cxc'})
for block in blocks:
item = {
'Title' : block.find('h3',{'class','LC20lb DKV0Md'}).text.encode('ascii', 'ignore'),
'Link' : block.find('div',{'class','yuRUbf'}).find('a')['href'],
'Description' : block.find('span',{'class','aCOpRe'}).text.encode('ascii', 'ignore')
}
#print(json.dumps(item,indent=2))
results.append(item)
time.sleep(3)
with open('JBscrape.csv','w',newline='') as csv_file:
writer = csv.DictWriter(csv_file, results[0].keys())
writer.writeheader()
for row in results:
writer.writerow(row)
print('Exported results to "JBscrape.csv" file')