JavaScript Object Notatio (JSON)
josn格式需要双引号否则报错""
import josn
with open ('data.json','r') as testfile:
text = josn.read(testfile)
data = json.loads(text)
print(data[0])
import json
str = """
[{
"name":"bob",
"gender":"male",
"birthday": "1992-10-18"
},{
"name":"小明",
"gender":"男",
"birthday" : "1995-11-23"
}]
"""
#dumps()方法将json转换为str,indent为缩进
#ensure_ascii=False和encoding='utd-8'为输出中文
data = json.loads(str)
with open('data.josn','w',encoding='utd-8') as testfile:
testfile.write(json.dumps(data,indent=2,ensure_ascii=False))
逗号分隔值CSV
import csv
with open('data.csv','r',encoding='utd-8') as testfile:
reader = csv.reader(testfile)
for i in reader:
print(i)
import csv
with open('data.csv','w',encoding='utd-8') as testfile:
wrirter = csv.writer(testfile):
writer.writerow(['id','nanme','age'])
writer.writerow(['10001','Mike','21'])
writer.writerows([['10002','Bob','19'],['10003','Young','20']])
但是一般情况爬虫都是结构化爬取数据,一般用字典表示
import csv
with open('data.csv','w',encoding='utd-8') as testfile:
fieldnames = ['id','name','age'] #定义头信息
writer = csv.DictWriter(testfile,fieldnames=fieldnames)
writer.writerheader() #写入头信息
writer.writerow(['10001','Mike','21'])
writer.writerows([['10002','Bob','19'],['10003','Young','20']])
若要将数据追加写入将’w’改为’a’