Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

{bio}[foss/2020b] ABRA2 v2.23, CRISPR-DAV v2.3.4, Excel-Writer-XLSX v1.09, FLASH v2.2.00, PRINSEQ v0.20.4, pysamstats v1.1.2 w/ Python 3.8.6 #13139

Merged
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
c49ebef
adding easyconfigs: naturalsort-0.1.3-foss-2020b.eb, pysamstats-1.1.2…
Jun 14, 2021
af9d999
less strict requirements of pysamstats
Jun 14, 2021
c39a53a
repaired source file name
Jun 14, 2021
a91aa34
changed http to https
Jun 14, 2021
a9662fa
changes according to review
Jun 15, 2021
69c6bdb
WIP removing hardcoded paths
Jun 15, 2021
5443b56
functional crispr-dav
Jun 23, 2021
974e09c
flash2 should be accessible via `flash` cmd
Jun 28, 2021
c06d427
review changes
Jun 29, 2021
b8bf8f4
setting crisprdav easyblock explicitely
Aug 17, 2021
d2c98b1
Delete crispr-dav-2.3.4-foss-2020b.eb
deniskristak Aug 17, 2021
0333231
removing explicit crispr-dav easyblock
Aug 17, 2021
fd5fbdc
Update CRISPR-DAV-2.3.4-foss-2020b.eb
deniskristak Aug 17, 2021
7e59428
Delete crispr-dav-2.3.4-foss2020b_remove_hardcoding.patch
deniskristak Aug 18, 2021
59b8101
Rename crispr-dav-2.3.4-foss2020b_remove_hardcoding.patch to crispr-d…
deniskristak Aug 18, 2021
37abf2d
Update CRISPR-DAV-2.3.4-foss-2020b.eb
deniskristak Aug 18, 2021
3b79f49
Create CRISPR-DAV-2.3.4-foss-2020b.eb
deniskristak Aug 18, 2021
daf8db7
Update Excel-Writer-XLSX-1.09-foss-2020b.eb
deniskristak Aug 18, 2021
852ce7a
Update FLASH-2.2.00-foss-2020b.eb
deniskristak Aug 18, 2021
37cc749
Update FLASH-2.2.00-foss-2020b.eb
deniskristak Aug 18, 2021
0464e66
Delete naturalsort-0.1.3-foss-2020b.eb
deniskristak Aug 18, 2021
1b1c3c6
Update pysamstats-1.1.2-foss-2020b.eb
deniskristak Aug 18, 2021
f231d83
Update CRISPR-DAV-2.3.4-foss-2020b.eb
deniskristak Aug 19, 2021
47df656
fix suggestions for ABRA2, CRISPR-DAV, PRINSEQ easyconfigs
boegel Aug 19, 2021
db6b328
fix long line in Excel-Writer-XLSX easyconfig
boegel Aug 19, 2021
c062a0e
stick to SAMtools 1.11 as dependency for CRISPR-DAV
boegel Aug 19, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions easybuild/easyconfigs/a/ABRA2/ABRA2-2.23-GCC-10.2.0.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
easyblock = 'MakeCp'

name = 'ABRA2'
version = '2.23'

homepage = 'https://github.com/mozack/abra2'
description = "Assembly Based ReAligner"

toolchain = {'name': 'GCC', 'version': '10.2.0'}

source_urls = ['https://github.com/mozack/abra2/archive/']
sources = ['v%(version)s.tar.gz']
patches = ['ABRA2-2.22_fix-Makefile.patch']
checksums = [
'3993f66a493070ee49df2865b6786a45a0cf6c379bae83e94b8339abbe673289', # v2.23.tar.gz
'05090efb306fc84d09f007a848ce0d0472f8633633b0a6eaf86ab075d092bc0d', # ABRA2-2.22_fix-Makefile.patch
]

builddependencies = [('Maven', '3.6.3', '', True)]

dependencies = [
('Java', '11', '', True),
('BWA', '0.7.17'),
]

parallel = 1

buildopts = 'CXX="$CXX" CXXFLAGS="$CXXFLAGS"'
buildopts += '&& make standalone CXX="$CXX" CXXFLAGS="$CXXFLAGS"'

files_to_copy = [
(['abra'], 'bin'),
(['target/libAbra.%s' % SHLIB_EXT], 'lib'),
'target/abra2-%(version)s-jar-with-dependencies.jar',
]

postinstallcmds = ["cd %(installdir)s && mv abra2-%(version)s-jar-with-dependencies.jar abra2-%(version)s.jar"]

sanity_check_paths = {
'files': ['abra2-%(version)s.jar', 'bin/abra', 'lib/libAbra.%s' % SHLIB_EXT],
boegel marked this conversation as resolved.
Show resolved Hide resolved
'dirs': [],
}

modextravars = {
"LC_ALL": "en_US.UTF-8",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@deniskristak Hmm, why is this needed? That looks a bit fishy, it doesn't belong in a module generated by EasyBuild...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ABRA2 needs it because of this:
mozack/abra2#25

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, please add a comment with a reference to that issue then:

# required to work around localization bug, see https://github.com/mozack/abra2/issues/25
modextravars = {'LC_ALL': 'en_US.UTF-8'}

}

moduleclass = 'bio'
36 changes: 36 additions & 0 deletions easybuild/easyconfigs/c/CRISPR-DAV/CRISPR-DAV-2.3.4-foss-2020b.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name = 'CRISPR-DAV'
version = '2.3.4'

homepage = 'https://github.com/pinetree1/crispr-dav/'
description = """CRISPR-DAV is a pipeline to analyze
amplicon-based NGS data of CRISPR clones in a high throughput manner."""

toolchain = {'name': 'foss', 'version': '2020b'}

source_urls = ['https://github.com/pinetree1/crispr-dav/archive/']
sources = ['v%(version)s.tar.gz']
patches = ['crispr-dav-2.3.4_remove_hardcoding.patch']
boegel marked this conversation as resolved.
Show resolved Hide resolved
checksums = [
'49975cd48bdbf31fe5a9e2aaa3f5ed85d3cc6f65a422ee3aa8daed890159d2ae', # v2.3.4.tar.gz
# crispr-dav-2.3.4-foss2020b_remove_hardcoding.patch
'41475a09754d65cc2c88d4161be6eacdfc98242bccbe49778c4f1f74210a6cb2',
]

# also needs `naturalsort`, which is now an R extension - see https://github.com/easybuilders/easybuild-easyconfigs/pull/13762
# if missing, try reinstalling R (https://docs.easybuild.io/en/latest/Partial_installations.html#installing-additional-extensions-using-k-skip)
boegel marked this conversation as resolved.
Show resolved Hide resolved
dependencies = [
('R', '4.0.3'),
('Perl', '5.32.0'),
('NGS', '2.10.9'),
('Pysam', '0.16.0.1'),
('Excel-Writer-XLSX', '1.09'),
('pysamstats', '1.1.2'),
('ABRA2', '2.23'),
('PRINSEQ', '0.20.4', '-Perl-%(perlver)s'),
('SAMtools', '1.12'),
('BEDTools', '2.30.0'),
('FLASH', '2.2.00'),
('Archive-Zip', '1.68'),
]

moduleclass = 'bio'
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# author: Denis Kristak (INUITS)
# changing example command to contain absolute paths + tabs escaping + changing plotting type to cairo
diff -ruN crispr-dav-2.3.4_orig/Examples/example1/run.sh crispr-dav-2.3.4/Examples/example1/run.sh
--- crispr-dav-2.3.4_orig/Examples/example1/run.sh 2019-07-26 21:15:08.000000000 +0200
+++ crispr-dav-2.3.4/Examples/example1/run.sh 2021-06-22 12:57:41.700453098 +0200
@@ -4,6 +4,7 @@

## If there is problem loading Perl modules., set PERL5LIB accordingly.
#export PERL5LIB=$HOME/perlmod/lib/perl5:$PERL5LIB
-../../crispr.pl --conf conf.txt --region amplicon.bed --crispr site.bed \
- --sitemap sample.site --fastqmap fastq.list --genome genomex
+
+$EBROOTCRISPRMINDAV/crispr.pl --conf $EBROOTCRISPRMINDAV/Examples/example1/conf.txt --region $EBROOTCRISPRMINDAV/Examples/example1/amplicon.bed --crispr $EBROOTCRISPRMINDAV/Examples/example1/site.bed \
+ --sitemap $EBROOTCRISPRMINDAV/Examples/example1/sample.site --fastqmap $EBROOTCRISPRMINDAV/Examples/example1/fastq.list --genome genomex --verbose 1

diff -ruN crispr-dav-2.3.4_orig/Modules/NGS.pm crispr-dav-2.3.4/Modules/NGS.pm
--- crispr-dav-2.3.4_orig/Modules/NGS.pm 2019-07-26 21:15:08.000000000 +0200
+++ crispr-dav-2.3.4/Modules/NGS.pm 2021-06-21 17:37:39.381993860 +0200
@@ -296,7 +296,7 @@
my $cmd = "$self->{bwa} mem";
$cmd .= " $h{param}" if $h{param};
if ( $h{id} && $h{sm} ) {
- $cmd .= " -R \'\@RG\tID:$h{id}\tSM:$h{sm}\tPL:$h{pl}\'";
+ $cmd .= " -R \'\@RG\\tID:$h{id}\\tSM:$h{sm}\\tPL:$h{pl}\'";
}
$cmd .= " $h{idxbase} $h{read1_inf}";
$cmd .= " $h{read2_inf}" if -f $h{read2_inf};
@@ -558,7 +558,7 @@
# file is already indexed.
my $cmd = "rm -rf $workdir && mkdir -p $workdir" .
" && $self->{java} -Djava.io.tmpdir=$tmpdir -jar $h{abra} --threads 2" .
- " --ref $h{ref_fasta} --targets $h{target_bed} --working $workdir" .
+ " --ref $h{ref_fasta} --targets $h{target_bed} --tmpdir $workdir" .
" --in $h{bam_inf} --out $h{bam_outf}";

if ($h{single}) {
diff -ruN crispr-dav-2.3.4_orig/Rscripts/allele.R crispr-dav-2.3.4/Rscripts/allele.R
--- crispr-dav-2.3.4_orig/Rscripts/allele.R 2019-07-26 21:15:08.000000000 +0200
+++ crispr-dav-2.3.4/Rscripts/allele.R 2021-06-22 13:13:45.868418215 +0200
@@ -114,7 +114,7 @@
} else {
h<-400
w<- ifelse(n>40, 13*n, h*1.25)
- png(filename=outfile, height=h, width=w)
+ png(filename=outfile, type='cairo', height=h, width=w)
}

on.exit(dev.off())
diff -ruN crispr-dav-2.3.4_orig/Rscripts/amplicon.R crispr-dav-2.3.4/Rscripts/amplicon.R
--- crispr-dav-2.3.4_orig/Rscripts/amplicon.R 2019-07-26 21:15:08.000000000 +0200
+++ crispr-dav-2.3.4/Rscripts/amplicon.R 2021-06-22 13:13:45.868418215 +0200
@@ -111,7 +111,7 @@
if ( high_res ) {
tiff(filename=outfile, width=5, height=4, units='in', res=1200)
} else {
- png(filename=outfile, width=500, height=400)
+ png(filename=outfile, type='cairo', width=500, height=400)
}
on.exit(dev.off())
print(p)
diff -ruN crispr-dav-2.3.4_orig/Rscripts/hdr.R crispr-dav-2.3.4/Rscripts/hdr.R
--- crispr-dav-2.3.4_orig/Rscripts/hdr.R 2019-07-26 21:15:08.000000000 +0200
+++ crispr-dav-2.3.4/Rscripts/hdr.R 2021-06-22 13:13:45.868418215 +0200
@@ -102,7 +102,7 @@
h<-400
barspace=60
w<- ifelse( n*barspace<h, h, n*barspace)
- png(filename=outfile, width=w, height=h)
+ png(filename=outfile, type='cairo', width=w, height=h)
}

print(p)
diff -ruN crispr-dav-2.3.4_orig/Rscripts/indel.R crispr-dav-2.3.4/Rscripts/indel.R
--- crispr-dav-2.3.4_orig/Rscripts/indel.R 2019-07-26 21:15:08.000000000 +0200
+++ crispr-dav-2.3.4/Rscripts/indel.R 2021-06-22 13:27:15.254201732 +0200
@@ -81,7 +81,7 @@
} else {
h<-450
w<- ifelse(n>10, 50*n, h)
- png(filename=imgfile, height=h, width=w)
+ png(filename=imgfile, type='cairo', height=h, width=w)
}

on.exit(dev.off())
diff -ruN crispr-dav-2.3.4_orig/Rscripts/read_chr.R crispr-dav-2.3.4/Rscripts/read_chr.R
--- crispr-dav-2.3.4_orig/Rscripts/read_chr.R 2019-07-26 21:15:08.000000000 +0200
+++ crispr-dav-2.3.4/Rscripts/read_chr.R 2021-06-22 13:13:45.868418215 +0200
@@ -72,7 +72,7 @@
w<-ifelse(n>5, 100*n, 550)
max_w=1000
w <- ifelse(w>max_w, max_w, w)
- png(filename=outfile, height=h, width=w)
+ png(filename=outfile, type='cairo', height=h, width=w)
}

print(p)
diff -ruN crispr-dav-2.3.4_orig/Rscripts/read_stats.R crispr-dav-2.3.4/Rscripts/read_stats.R
--- crispr-dav-2.3.4_orig/Rscripts/read_stats.R 2019-07-26 21:15:08.000000000 +0200
+++ crispr-dav-2.3.4/Rscripts/read_stats.R 2021-06-22 13:13:45.868418215 +0200
@@ -69,7 +69,7 @@
} else {
h<-400
w<- ifelse(n>10, 50*n, h)
- png(filename=outfile, height=h, width=w)
+ png(filename=outfile, type='cairo', height=h, width=w)
}

p <- ggplot(dat.m, aes(x=Sample, y=value, fill=variable)) +
diff -ruN crispr-dav-2.3.4_orig/Rscripts/snp.R crispr-dav-2.3.4/Rscripts/snp.R
--- crispr-dav-2.3.4_orig/Rscripts/snp.R 2019-07-26 21:15:08.000000000 +0200
+++ crispr-dav-2.3.4/Rscripts/snp.R 2021-06-22 13:13:45.868418215 +0200
@@ -76,7 +76,7 @@
if ( high_res ) {
tiff(filename=outfile, width=5, height=4, units='in', res=1200)
} else {
- png(filename=outfile, width=500, height=400)
+ png(filename=outfile, type='cairo', width=500, height=400)
}
print(p)
invisible(dev.off())
@@ -182,7 +182,7 @@
wt <- 600
}
ht <- 400
- png(filename=outfile, width=wt, height=ht)
+ png(filename=outfile, type='cairo', width=wt, height=ht)
}

print(p)
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
easyblock = 'PerlModule'

name = 'Excel-Writer-XLSX'
version = '1.09'

homepage = 'https://metacpan.org/pod/Excel::Writer::XLSX'
description = """The Excel::Writer::XLSX module can be used to create an Excel file in the 2007+ XLSX format.
Multiple worksheets can be added to a workbook and formatting can be applied to cells. Text, numbers, and formulas
can be written to the cells."""

toolchain = {'name': 'foss', 'version': '2020b'}

source_urls = ['https://cpan.metacpan.org/authors/id/J/JM/JMCNAMARA/']
sources = ['%(name)s-%(version)s.tar.gz']
checksums = ['d679c6ac19e93c32ab77594c793e41b948c7bb3873b600e70ad637d093dca187']

dependencies = [
('Perl', '5.32.0'),
('Archive-Zip', '1.68'),
]

options = {'modulename': 'Excel::Writer::XLSX'}

sanity_check_paths = {
'files': ['bin/extract_vba'],
deniskristak marked this conversation as resolved.
Show resolved Hide resolved
'dirs': ['lib/perl5/site_perl/%(perlver)s/Excel/Writer/XLSX'],
}

sanity_check_commands = ['extract_vba --help 2>&1 | grep "This utility is used to extract the VBA project binary from an Excel"']


moduleclass = 'tools'
34 changes: 34 additions & 0 deletions easybuild/easyconfigs/f/FLASH/FLASH-2.2.00-foss-2020b.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
easyblock = 'MakeCp'

name = 'FLASH'
version = '2.2.00'

homepage = 'https://ccb.jhu.edu/software/FLASH/'
description = """FLASH (Fast Length Adjustment of SHort reads) is a very fast
and accurate software tool to merge paired-end reads from next-generation
sequencing experiments. FLASH is designed to merge pairs of reads when the
original DNA fragments are shorter than twice the length of reads. The
resulting longer reads can significantly improve genome assemblies. They can
also improve transcriptome assembly when FLASH is used to merge RNA-seq data.
"""

toolchain = {'name': 'foss', 'version': '2020b'}

source_urls = ['https://github.com/dstreett/FLASH2/archive/']
sources = ['%(version)s.zip']
checksums = ['1e54b2dd7d21ca3e0595a3ffdd27ef3098f88c4de5b9302ec5ea074b49b79960']

files_to_copy = [(['flash2'], 'bin')]

postinstallcmds = ["cd %(installdir)s/bin && ln -s flash2 flash"]

sanity_check_paths = {
'files': ['bin/flash2', 'bin/flash'],
'dirs': [],
}

sanity_check_commands = [
"flash --help",
]

moduleclass = 'bio'
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# author: Denis Kristak

easyblock = 'Tarball'

name = 'PRINSEQ'
version = '0.20.4'
versionsuffix = '-Perl-%(perlver)s'

homepage = 'http://prinseq.sourceforge.net'
description = """A bioinformatics tool to PRe-process and show INformation of SEQuence data."""

toolchain = {'name': 'foss', 'version': '2020b'}

source_urls = ['http://sourceforge.net/projects/prinseq/files/standalone/']
sources = ['%(namelower)s-lite-%(version)s.tar.gz']
checksums = ['9b5e0dce3b7f02f09e1cc7e8a2dd77c0b133e5e35529d570ee901f53ebfeb56f']

dependencies = [
('Perl', '5.32.0'),
('cairo', '1.16.0'),
]

# these are the perl libraries dependencies
exts_defaultclass = 'PerlModule'
exts_filter = ("perldoc -lm %(ext_name)s ", "")

exts_list = [
('ExtUtils::Depends', '0.8001', {
'source_tmpl': 'ExtUtils-Depends-%(version)s.tar.gz',
'source_urls': ['https://cpan.metacpan.org/authors/id/X/XA/XAOC/'],
'checksums': ['673c4387e7896c1a216099c1fbb3faaa7763d7f5f95a1a56a60a2a2906c131c5'],
}),
('ExtUtils::PkgConfig', '1.16', {
'source_tmpl': 'ExtUtils-PkgConfig-%(version)s.tar.gz',
'source_urls': ['https://cpan.metacpan.org/authors/id/X/XA/XAOC/'],
'checksums': ['bbeaced995d7d8d10cfc51a3a5a66da41ceb2bc04fedcab50e10e6300e801c6e'],
}),
('Pod::Usage', '2.0', {
'source_tmpl': 'Pod-Usage-%(version)s.tar.gz',
'source_urls': ['https://cpan.metacpan.org/authors/id/M/MA/MAREKR/'],
'checksums': ['530943a9ac3ba00404d7be8ee8572f30f6db9de123cd725af3647333a87d4fea'],
}),
('Cairo', '1.109', {
'source_tmpl': 'Cairo-%(version)s.tar.gz',
'source_urls': ['https://cpan.metacpan.org/authors/id/X/XA/XAOC/'],
'checksums': ['8219736e401c2311da5f515775de43fd87e6384b504da36a192f2b217643077f'],
}),
('Statistics::PCA', '0.0.1', {
'source_tmpl': 'Statistics-PCA-%(version)s.tar.gz',
'source_urls': ['https://cpan.metacpan.org/authors/id/D/DS/DSTH/'],
'checksums': ['f8adb10b00232123d103a5b49161ad46370f47fe0f752e5462a4dc15f9d46bc4'],
}),
('MIME::Base64', '3.15', {
'source_tmpl': 'MIME-Base64-%(version)s.tar.gz',
'source_urls': ['https://cpan.metacpan.org/authors/id/G/GA/GAAS/'],
'checksums': ['7f863566a6a9cb93eda93beadb77d9aa04b9304d769cea3bb921b9a91b3a1eb9'],
}),
('Math::Cephes::Matrix', '0.5305', {
'source_tmpl': 'Math-Cephes-%(version)s.tar.gz',
'source_urls': ['https://cpan.metacpan.org/authors/id/S/SH/SHLOMIF/'],
'checksums': ['561a800a4822e748d2befc366baa4b21e879a40cc00c22293c7b8736caeb83a1'],
}),
('Math::MatrixReal', '2.13', {
'source_tmpl': 'Math-MatrixReal-%(version)s.tar.gz',
'source_urls': ['https://cpan.metacpan.org/authors/id/L/LE/LETO/'],
'checksums': ['4f9fa1a46dd34d2225de461d9a4ed86932cdd821c121fa501a15a6d4302fb4b2'],
}),
('Text::SimpleTable', '2.07', {
'source_tmpl': 'Text-SimpleTable-%(version)s.tar.gz',
'source_urls': ['https://cpan.metacpan.org/authors/id/M/MR/MRAMBERG/'],
'checksums': ['256d3f38764e96333158b14ab18257b92f3155c60d658cafb80389f72f4619ed'],
}),
('Contextual::Return', '0.2.1', {
'source_tmpl': 'Contextual-Return-v%(version)s.tar.gz',
'source_urls': ['https://cpan.metacpan.org/authors/id/D/DC/DCONWAY/'],
'checksums': ['17a1ed1043a2abe123920894d6023709e834de66ef766be86e4cfbb96b1dd16c'],
}),
]

# this is the script which relies in many extra Perl libraries so we verify it's working fine
sanity_check_commands = ['%(installdir)s/prinseq-graphs.pl']

modextrapaths = {
boegel marked this conversation as resolved.
Show resolved Hide resolved
'PATH': '',
'PERL5LIB': 'lib/perl5/site_perl/%(perlver)s/',
}

postinstallcmds = [
"sed -i -e 's|/usr/bin/perl|/usr/bin/env\ perl|' %(installdir)s/*.pl",
boegel marked this conversation as resolved.
Show resolved Hide resolved
"chmod +x %(installdir)s/*.pl",
]

sanity_check_paths = {
'files': ['prinseq-lite.pl', 'prinseq-graphs.pl', 'prinseq-graphs-noPCA.pl'],
'dirs': []
}


boegel marked this conversation as resolved.
Show resolved Hide resolved
moduleclass = 'bio'
Loading