1. 리눅스에서 바로 확인하기
for i in ./*.fastq.gz; do echo "${i}: $(echo "scale=0; $(zcat "${i}" | wc -l)/4" | bc)" ; done >>counts.txt
출력물 ↓
./001AcCUSw_trim_1.fastq.gz: 58358
./001AcCUSw_trim_2.fastq.gz: 58358
./001AcLUSw_3_trim_1.fastq.gz: 57066
./001AcLUSw_3_trim_2.fastq.gz: 57066
./001AcLUSw_4_trim_1.fastq.gz: 59239
./001AcLUSw_4_trim_2.fastq.gz: 59239
./001AcRUSw_1_trim_1.fastq.gz: 61334
./001AcRUSw_1_trim_2.fastq.gz: 61334
./001AcRUSw_2_trim_1.fastq.gz: 52160
./001AcRUSw_2_trim_2.fastq.gz: 52160
2. Seqkit을 사용하기
conda install -c bioconda seqkit
seqkit stats *.fastq.gz
# 결과물을 stats.tsv로 저장하고 싶다면
seqkit stats -To stats.tsv *.fastq.gz
출력물↓
file format type num_seqs sum_len min_len avg_len max_len
PHB_Rep1_R1.fastq.gz FASTQ RNA 118571 11857100 100 100.0 100
PHB_Rep1_R2.fastq.gz FASTQ RNA 118571 11857100 100 100.0 100
PHB_Rep2_R1.fastq.gz FASTQ RNA 144826 14482600 100 100.0 100
PHB_Rep2_R2.fastq.gz FASTQ RNA 144826 14482600 100 100.0 100
PHB_Rep3_R1.fastq.gz FASTQ RNA 129786 12978600 100 100.0 100
PHB_Rep3_R2.fastq.gz FASTQ RNA 129786 12978600 100 100.0 100
PUH_Rep1_R1.fastq.gz FASTQ RNA 227392 22739200 100 100.0 100
PUH_Rep1_R2.fastq.gz FASTQ RNA 227392 22739200 100 100.0 100
PUH_Rep2_R1.fastq.gz FASTQ RNA 162373 16237300 100 100.0 100
PUH_Rep2_R2.fastq.gz FASTQ RNA 162373 16237300 100 100.0 100
PUH_Rep3_R1.fastq.gz FASTQ RNA 185442 18544200 100 100.0 100
PUH_Rep3_R2.fastq.gz FASTQ RNA 185442 18544200 100 100.0 100
| 참고
- https://www.biostars.org/p/139006/
반응형