alevin-fry SCRIPT_DIR=$(pwd) OUT_DIR='./results' CELLRANGER_OUTPUTS='/home/placeholder/bam_dir' SALMON_INDEX='./salmon_index/' tgMap_FILE='tg2.txt' CELLRANGER_BARCODES=/apps/prod/easybuild/sl7.x86_64.foss-2021a/software/cellranger/7.0.0/lib/python/cellranger/barcodes/737K-august-2016.txt mkdir -p ${OUT_DIR} # Input list with name of all samples to run scHLAtype on input_list='list_of_samples.txt' readarray -t array_list < ${input_list} SAMPLE_NAME=${array_list[$SLURM_ARRAY_TASK_ID-1]} # Output folder for each sample OUT_DIR_SAMPLE=${OUT_DIR}/${SAMPLE_NAME} # Path to BAM file that was created by cellranger multi ORIGINAL_BAM_FILE=${CELLRANGER_OUTPUTS}/${SAMPLE_NAME}/outs/possorted_genome_bam.bam echo "check1" # Extract fastq files cellranger bamtofastq --nthreads=16 --reads-per-fastq=9223372036854775807 ${ORIGINAL_BAM_FILE} ${OUT_DIR_SAMPLE} # Check header to know which library contains Gene Expression regex_expr='"library_id":([0-9])' samtools_header=$(samtools view -H ${ORIGINAL_BAM_FILE} | grep "Gene Expression") echo ${samtools_header} # Do actual regex to know which library contains Gene Expression [[ $samtools_header =~ $regex_expr ]] library_number=${BASH_REMATCH[1]} cd ${OUT_DIR_SAMPLE} echo "check2" combine_grep=_${library_number}_1_ gex_folder=$(realpath $(ls | grep "${combine_grep}")) echo Identified ${gex_folder} as correct Gene Expression library cd ${gex_folder} # Get fastq files FASTQ_ONE=$(echo $(ls -1 -d "$PWD/"* | grep "_R1_") | tr -s ' ') FASTQ_TWO=$(echo $(ls -1 -d "$PWD/"* | grep "_R2_") | tr -s ' ') cd ${SCRIPT_DIR} chmod -R 770 ~/salmon/results echo "check3" # Run salmon alevin salmon alevin -l ISF -1 ${FASTQ_ONE} -2 ${FASTQ_TWO} --chromium -i ${SALMON_INDEX} -p 16 -o ${OUT_DIR_SAMPLE}/salmon --tgMap ${tgMap_FILE} --sketch # Run alevin fry # generate-permit-list: Now, we can use this permit list to scan the cell barcodes actually encountered in our #reads and determine a set of cells that were likely present in our sample echo "check4" alevin-fry generate-permit-list --input ${OUT_DIR_SAMPLE}/salmon --expected-ori rc --output-dir ${OUT_DIR_SAMPLE}/salmon/alevin_fry --unfiltered-pl ${CELLRANGER_BARCODES} echo "check5" # collate: Given the permit list and barcode mapping (which resides in the directory), we collate #the original RAD file using the command below. alevin-fry collate -i ${OUT_DIR_SAMPLE}/salmon/alevin_fry --rad-dir ${OUT_DIR_SAMPLE}/salmon/ -t 16 echo "check6" # quant: Finally, we quantify the collated rad file using the cr-like-em resolution strategy using the quant #command below. alevin-fry quant -i ${OUT_DIR_SAMPLE}/salmon/alevin_fry -m ${tgMap_FILE} --resolution cr-like-em -o ${OUT_DIR_SAMPLE}/salmon/alevin_fry --dump-eqclasses --use-mtx -t 16 echo "check7" # Run infer alevin-fry infer --count-mat ${OUT_DIR_SAMPLE}/salmon/alevin_fry/alevin/geqc_counts.mtx --eq-labels ${OUT_DIR_SAMPLE}/salmon/alevin_fry/alevin/gene_eqclass.txt.gz --output-dir ${OUT_DIR_SAMPLE}/salmon/alevin_fry_final -t 16