ref=/home/lzn/WES/genome/resources_broad_hg38_v0_Homo_sapiens_assembly38.fasta
wkd=/home/lzn/WES
mkdir $wkd/vcf
for sample in `cat $wkd/sample.list`
do
samtools mpileup -ugf $ref $wkd/align/$sample.sorted.dedup.recal.bam | bcftools call -vmO v -o $wkd/vcf/$sample.bcf.vcf
done
ref=/home/lzn/WES/genome/resources_broad_hg38_v0_Homo_sapiens_assembly38.fasta
dbsnp=/home/lzn/WES/genome/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf
wkd=/home/lzn/WES
for sample in `cat $wkd/sample.list`
do
gatk HaplotypeCaller \ #此处不能用HaplotypeCallerSpark
-R $ref \
-I $wkd/align/$sample.sorted.dedup.recal.bam \
#--dbsnp $dbsnp \ java.lang.IllegalArgumentException: HaplotypeCallerSpark does not yet support -D or --dbsnp arguments
-O $wkd/vcf/$sample.gatk.vcf \
1>$sample.log 2>&1
done
********************************************************************************
#10:42:25.938 INFO HaplotypeCallerSpark - The output of this tool DOES NOT match the output of HaplotypeCaller.
#10:42:25.938 INFO HaplotypeCallerSpark - It is under development and should not be used for production work.
#10:42:25.938 INFO HaplotypeCallerSpark - For evaluation only.
#10:42:25.938 INFO HaplotypeCallerSpark - Use the non-spark HaplotypeCaller if you care about the results.
********************************************************************************
#GSM2653854 HCC1-Tissue GSM2653855 HCC3-Tissue
#GSM2746362 Healthy1-Tissue
wkd=/home/lzn/WES
ref=/home/lzn/WES/genome/resources_broad_hg38_v0_Homo_sapiens_assembly38.fasta
mkdir $wkd/mutect
gatk --java-options "-Xmx16G -Djava.io.tmpdir=./" \
Mutect2 \
-R $ref \
-I $wkd/align/GSM2653854.sorted.dedup.recal.bam \
-I $wkd/align/GSM2746362.sorted.dedup.recal.bam\
-normal GSM2746362 \
#--panel-of-normals $wkd/mutect/GSM2746362.vcf \ 已经分析得到的vcf文件
--af-of-alleles-not-in-resource 1e-6 \
-O $wkd/mutect/GSM2653854.vcf \
1>$wkd/mutect/log.txt 2>&1
#Starting with v4.0.4.0, GATK recommends the default setting of --af-of-alleles-not-in-resource, which the tool dynamically adjusts for different modes. tumor-only calling sets the default to 5e-8, tumor-normal calling sets it to 1e-6 and mitochondrial mode sets it to 4e-3. For previous versions, the default was 0.001, the average heterozygosity of humans. For other organisms, change --af-of-alleles-not-in-resource to 1/(ploidy*samples in resource
gatk GetPileupSummaries \
-I $wkd/align/GSM2653854.sorted.dedup.recal.bam \
-V $wkd/mutect/GSM2653854.vcf \
-L $wkd/genome/resources_broad_hg38_v0_1000G_phase1.snps.high_confidence.hg38.vcf.gz \
-O $wkd/mutect/GSM2653854.pileups.table
#A USER ERROR has occurred: Bad input: Population vcf does not have an allele frequency (AF) info field in its header.
# #CHROM POS ID REF ALT QUAL FILTER INFO
chr6 29942512 . G C 2974860 VQSRTrancheSNP99.80to99.90 AF=0.063
chr6 29942517 . C A 2975860 VQSRTrancheSNP99.80to99.90 AF=0.062
chr6 29942525 . G C 2975600 VQSRTrancheSNP99.60to99.80 AF=0.063
chr6 29942547 rs114945359 G C 15667700 PASS AF=0.077
#生成符合格式的vcf文件
less -Sn resources_broad_hg38_v0_1000G_phase1.snps.high_confidence.hg38.vcf.gz |grep -v ^## | sed 's/;/\t/g' | awk '{print $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6 "\t" $7 "\t" $9}' > resources_broad_hg38_v0_1000G_phase1.snps.high_confidence.hg38.biallelic.vcf
gatk GetPileupSummaries \
-I $wkd/align/GSM2653854.sorted.dedup.recal.bam \
-V $wkd/mutect/GSM2653854.vcf \
-L $wkd/genome/resources_broad_hg38_v0_1000G_phase1.snps.high_confidence.hg38.biallelic.vcf \
-O $wkd/mutect/GSM2653854.pileups.table
#A USER ERROR has occurred: Couldn't read file /home/lzn/WES/genome/resources_broad_hg38_v0_1000G_phase1.snps.high_confidence.hg38.biallelic.vcf. Error was: The file /home/lzn/WES/genome/resources_broad_hg38_v0_1000G_phase1.snps.high_confidence.hg38.biallelic.vcf exists, but does not contain Features (ie., is not in a supported Feature file format such as vcf, bcf, bed, or interval_list), and does not have one of the supported interval file extensions
head resources_broad_hg38_v0_1000G_phase1.snps.high_confidence.hg38.biallelic.vcf |less -Sn
#CHROM POS ID REF ALT QUAL FILTER
chr1 51479 rs116400033 T A 11726.81 PASS AF=0.3253
chr1 55367 . G A 207.20 PASS AF=0.00117
chr1 55388 . C T 95.61 PASS AF=0.00056
#header缺少INFO
#修改后依然报错
zcat resources_broad_hg38_v0_1000G_phase1.snps.high_confidence.hg38.vcf.gz|grep ^\#\# >tmp.txt
cat tmp.txt resources_broad_hg38_v0_1000G_phase1.snps.high_confidence.hg38.biallelic.vcf >>tmp2.vcf
#TribbleException$MalformedFeatureFile: Error parsing LineIteratorImpl(SynchronousLineReader) just after record at: chr1:173904011-173904011, for input source: /home/lzn/WES/genome/tmp2.vcf
#先跳过这部分:Calculate Contamination
#Tools involved: GetPileupSummaries, CalculateContamination
HaplotypeCaller is designed to call germline variants, while Mutect2 is designed to call somatic variants.Neither is appropriate for the other use case.