pip install PyVCF
#如果有多个样本,可以这样访问最后两列信息,例如双样本,0为normal,1为tumor
record.samples[0]
对vcf进行筛选,并重新生成vcf
切记用完一定要close()
import vcf
filaname1='xxx.vcf'
filename2='xxx.vcf'
tumor_idx=1
vcf_reader = vcf.Reader(open(filename1, 'r'))
vcf_writer = vcf.Writer(open(filename2,'w'),vcf_reader)
for record in vcf_reader:
if len(record.FILTER) == 0: #pass
af = record.samples[tumor_idx]['AF']
if af >= 0.01:
vcf_writer.write_record(record)
vcf_writer.close()
也可以读取vcf.gz
import vcf
vcf_name = "xx.vcf.gz"
vcf_reader = vcf.Reader(filename = vcf_name)
for record in vcf_reader:
print(record.CHROM) ##str
print(record.POS) ##int
print(record.REF) ##str
print(record.ALT) ##list
print(record.INFO) ##dict
print(record.FORMAT) ##str