阅读:8255回复:0
全外显子分析(WES)的一般步骤-6 过滤篇
直接比对出来的结果非常乱,会有一堆假阳性,所以需要与金标准进行比对,同时去除一些低质量的背景噪音
**注: REF,跟hg_ref是同一定义,有些地方我用的hg19_ref,有些地方我直接用REF,其实应该统一,我懒的改了。。 ############################################################# # Step 10. Filter out low quality mapping ############################################################# # use bamtools filter $bamtools filter -mapQuality ">50" -in $OUTPUT/${subjectID}.rmdup.bam -out $OUTPUT/${subjectID}.mq.srt.bam # indexing $samtools index $OUTPUT/${subjectID}.mq.srt.bam ############################################################# # Step 11. Create target intervals for realignment ############################################################# # make a temp library mkdir -p temp_realign # use GATK to create intervals java -Xmx${java_heapsize}m -Djava.io.tmpdir=temp_realign \ -jar $gatk \ -T RealignerTargetCreator \ -fixMisencodedQuals \ -I $OUTPUT/${subjectID}.mq.srt.bam \ -R $REF \ -known $DBSNP \ -nt $CPUs \ -o $OUTPUT/${subjectID}.forRealign.intervals \ -L $ExonFile \ -S LENIENT # remove the temp directory rm -rf temp_realign ############################################################# # Step 12. Realignment ############################################################# # make a temp directory mkdir -p temp_realign # GATK realignment java -Xmx${java_heapsize}m -Djava.io.tmpdir=temp_realign \ -jar $gatk \ -T IndelRealigner \ -fixMisencodedQuals \ -targetIntervals $OUTPUT/${subjectID}.forRealign.intervals \ -I $OUTPUT/${subjectID}.mq.srt.bam \ -o $OUTPUT/${subjectID}.realigned.bam \ -R $REF \ -known $DBSNP \ -L $ExonFile \ -S LENIENT # remove the temp directory rm -rf temp_realign # sort and index $samtools sort -m 3000000000 $OUTPUT/${subjectID}.realigned.bam $OUTPUT/${subjectID}.realigned.srt $samtools index $OUTPUT/${subjectID}.realigned.srt.bam ############################################################# # Step 13. Recalibrate the quality score ############################################################# # make a temp directory mkdir -p temp_recal # GATK BaseRecalibrator for first pass of the base quality score recalibartion java -Xmx${java_heapsize}m -Djava.io.tmpdir=temp_recal \ -jar $gatk \ -T BaseRecalibrator \ -I $OUTPUT/${subjectID}.realigned.srt.bam \ -R $REF \ -knownSites $DBSNP \ -nct $CPUs \ -cov ReadGroupCovariate \ -cov QualityScoreCovariate \ -cov CycleCovariate \ -cov ContextCovariate \ -o $OUTPUT/${subjectID}.flt.recal.table \ -l INFO \ -L $ExonFile \ -S LENIENT # GATK PrintReads, which renders all reads from the input data java -Xmx${java_heapsize}m -Djava.io.tmpdir=temp_recal \ -jar $gatk \ -T PrintReads \ -R $REF \ -nct $CPUs \ -I $OUTPUT/${subjectID}.realigned.srt.bam \ -BQSR $OUTPUT/${subjectID}.flt.recal.table \ -o $OUTPUT/${subjectID}.realigned.recal.bam \ -l INFO \ -L $ExonFile \ -S LENIENT # generate AnalyzeCovariates plots java -Xmx${java_heapsize}m -Djava.io.tmpdir=temp_recal \ -jar $gatk \ -T AnalyzeCovariates \ -R $REF \ -BQSR $OUTPUT/${subjectID}.flt.recal.table # remove the temp directory rm -rf temp_recal # statistics ${bamtools} stats -insert -in $OUTPUT/${subjectID}.realigned.recal.bam > $OUTPUT/${subjectID}.realigned.recal.stats |
|