GSM2653854 HCC1-Tissue GSM2653855 HCC3-Tissue
GSM2746362 Healthy1-Tissue
#数据下载
cat SRR_Acc_List.txt |xargs -i prefetch -p {} &
# 将每个sample对应的文件放入以sample命名的文件夹中
#/home/lzn/WES/rawdata/
ls -l ~/wendang/WES/rawdata/|grep GSM*|awk '{print $9}'|xargs -i mkdir {}
ls -l |grep GSM| awk '{print $9}' > sample.list
cat sample.list
GSM2653854
GSM2653855
GSM2746362
#SRA文件转化为fastq文件
export wkd=/home/lzn/WES
for sample in `cat $wkd/sample.list`; do
mkdir $wkd/rawdata/$sample
cd $wkd/rawdata/$sample
echo $wkd/rawdata/$sample
ln -s ~/wendang/WES/rawdata/$sample/* $wkd/rawdata/$sample
ls ./|xargs -i fastq-dump --split-e -O ./ {}
done
#将sample数据整合
for sample in `cat $wkd/sample.list`; do
cd $wkd/rawdata/$sample
echo $(wkd)
echo $wkd/rawdata/$sample\_1.fastq
cat *_1.fastq > $wkd/rawdata/$sample\_1.fastq
cat *_2.fastq > $wkd/rawdata/$sample\_2.fastq
cd $wkd
rm -r $wkd/rawdata/$sample
done
#先提取小的测序数据集做练习
for sample in `cat $wkd/sample.list`; do
head -8000000 $wkd/rawdata/$sample\_1.fastq > $wkd/cleandata/$sample\_1.fastq
head -8000000 $wkd/rawdata/$sample\_2.fastq > $wkd/cleandata/$sample\_2.fastq
done
https://console.cloud.google.com/storage/browser/genomics-public-data/resources/broad/hg38/v0
#GENOME
wget https://storage.cloud.google.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta
wget https://storage.cloud.google.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.fai
wget https://storage.cloud.google.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.dict
#BWA index
cd /home/lzn/WES/genome/index
wget https://storage.cloud.google.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.64.amb
wget https://storage.cloud.google.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.64.ann
wget https://storage.cloud.google.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.64.bwt
wget https://storage.cloud.google.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.64.pac
wget https://storage.cloud.google.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.64.sa
#SNP&INDEL
wget https://storage.cloud.google.com/genomics-public-data/resources/broad/hg38/v0/1000G_phase1.snps.high_confidence.hg38.vcf.gz
wget https://storage.cloud.google.com/genomics-public-data/resources/broad/hg38/v0/1000G_phase1.snps.high_confidence.hg38.vcf.gz.tbi
wget https://storage.cloud.google.com/genomics-public-data/resources/broad/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
wget https://storage.cloud.google.com/genomics-public-data/resources/broad/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi