Skip to content
Snippets Groups Projects
bioinformatics.scm 106 KiB
Newer Older
  • Learn to ignore specific revisions
  • Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    ;;; GNU Guix --- Functional package management for GNU
    
    ;;; Copyright © 2014, 2015 Ricardo Wurmus <rekado@elephly.net>
    
    Ben Woodcroft's avatar
    Ben Woodcroft committed
    ;;; Copyright © 2015 Ben Woodcroft <donttrustben@gmail.com>
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    ;;;
    ;;; This file is part of GNU Guix.
    ;;;
    ;;; GNU Guix is free software; you can redistribute it and/or modify it
    ;;; under the terms of the GNU General Public License as published by
    ;;; the Free Software Foundation; either version 3 of the License, or (at
    ;;; your option) any later version.
    ;;;
    ;;; GNU Guix is distributed in the hope that it will be useful, but
    ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
    ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    ;;; GNU General Public License for more details.
    ;;;
    ;;; You should have received a copy of the GNU General Public License
    ;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
    
    (define-module (gnu packages bioinformatics)
      #:use-module ((guix licenses) #:prefix license:)
      #:use-module (guix packages)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (guix utils)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (guix download)
    
      #:use-module (guix git-download)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (guix build-system gnu)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (guix build-system cmake)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (guix build-system perl)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (guix build-system python)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (guix build-system trivial)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages)
    
    Ben Woodcroft's avatar
    Ben Woodcroft committed
      #:use-module (gnu packages algebra)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages base)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages boost)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages compression)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages cpio)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages file)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages java)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages linux)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages machine-learning)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages maths)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages ncurses)
      #:use-module (gnu packages perl)
      #:use-module (gnu packages pkg-config)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages popt)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages protobuf)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages python)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages statistics)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages tbb)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages textutils)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages vim)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages web)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages xml)
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
      #:use-module (gnu packages zip)
      #:use-module (srfi srfi-1))
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    
    
    Ben Woodcroft's avatar
    Ben Woodcroft committed
    (define-public aragorn
      (package
        (name "aragorn")
        (version "1.2.36")
        (source (origin
                  (method url-fetch)
                  (uri (string-append
                        "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
                        version ".tgz"))
                  (sha256
                   (base32
                    "1dg7jlz1qpqy88igjxd6ncs11ccsirb36qv1z01a0np4i4jh61mb"))))
        (build-system gnu-build-system)
        (arguments
         `(#:tests? #f ; there are no tests
           #:phases
           (modify-phases %standard-phases
             (delete 'configure)
             (replace 'build
                      (lambda _
                        (zero? (system* "gcc"
                                        "-O3"
                                        "-ffast-math"
                                        "-finline-functions"
                                        "-o"
                                        "aragorn"
                                        (string-append "aragorn" ,version ".c")))))
             (replace 'install
                      (lambda* (#:key outputs #:allow-other-keys)
                        (let* ((out (assoc-ref outputs "out"))
                               (bin (string-append out "/bin"))
                               (man (string-append out "/share/man/man1")))
                          (mkdir-p bin)
                          (copy-file "aragorn"
                                     (string-append bin "/aragorn"))
                          (mkdir-p man)
                          (copy-file "aragorn.1"
                                     (string-append man "/aragorn.1")))
                        #t)))))
        (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
        (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
        (description
         "Aragorn identifies transfer RNA, mitochondrial RNA and
    transfer-messenger RNA from nucleotide sequences, based on homology to known
    tRNA consensus sequences and RNA structure.  It also outputs the secondary
    structure of the predicted RNA.")
        (license license:gpl2)))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public bamtools
      (package
        (name "bamtools")
        (version "2.3.0")
        (source (origin
                  (method url-fetch)
                  (uri (string-append
                        "https://github.com/pezmaster31/bamtools/archive/v"
                        version ".tar.gz"))
                  (file-name (string-append name "-" version ".tar.gz"))
                  (sha256
                   (base32
                    "1brry29bw2xr2l9pqn240rkqwayg85b8qq78zk2zs6nlspk4d018"))))
        (build-system cmake-build-system)
    
        (arguments
         `(#:tests? #f ;no "check" target
           #:phases
           (modify-phases %standard-phases
             (add-before
              'configure 'set-ldflags
              (lambda* (#:key outputs #:allow-other-keys)
                (setenv "LDFLAGS"
                        (string-append
                         "-Wl,-rpath="
                         (assoc-ref outputs "out") "/lib/bamtools")))))))
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
        (inputs `(("zlib" ,zlib)))
        (home-page "https://github.com/pezmaster31/bamtools")
        (synopsis "C++ API and command-line toolkit for working with BAM data")
        (description
         "BamTools provides both a C++ API and a command-line toolkit for handling
    BAM files.")
        (license license:expat)))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public bedops
      (package
        (name "bedops")
    
        (version "2.4.14")
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
        (source (origin
                  (method url-fetch)
                  (uri (string-append "https://github.com/bedops/bedops/archive/v"
                                      version ".tar.gz"))
    
                  (file-name (string-append name "-" version ".tar.gz"))
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
                  (sha256
                   (base32
    
                    "1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v"))))
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
        (build-system gnu-build-system)
        (arguments
         '(#:tests? #f
           #:make-flags (list (string-append "BINDIR=" %output "/bin"))
           #:phases
           (alist-cons-after
             'unpack 'unpack-tarballs
             (lambda _
               ;; FIXME: Bedops includes tarballs of minimally patched upstream
               ;; libraries jansson, zlib, and bzip2.  We cannot just use stock
               ;; libraries because at least one of the libraries (zlib) is
               ;; patched to add a C++ function definition (deflateInit2cpp).
               ;; Until the Bedops developers offer a way to link against system
               ;; libraries we have to build the in-tree copies of these three
               ;; libraries.
    
               ;; See upstream discussion:
               ;; https://github.com/bedops/bedops/issues/124
    
               ;; Unpack the tarballs to benefit from shebang patching.
               (with-directory-excursion "third-party"
                 (and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
                      (zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
                      (zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
               ;; Disable unpacking of tarballs in Makefile.
               (substitute* "system.mk/Makefile.linux"
                 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
                 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
               (substitute* "third-party/zlib-1.2.7/Makefile.in"
                 (("^SHELL=.*$") "SHELL=bash\n")))
             (alist-delete 'configure %standard-phases))))
        (home-page "https://github.com/bedops/bedops")
        (synopsis "Tools for high-performance genomic feature operations")
        (description
         "BEDOPS is a suite of tools to address common questions raised in genomic
    studies---mostly with regard to overlap and proximity relationships between
    data sets.  It aims to be scalable and flexible, facilitating the efficient
    and accurate analysis and management of large-scale genomic data.
    
    BEDOPS provides tools that perform highly efficient and scalable Boolean and
    other set operations, statistical calculations, archiving, conversion and
    other management of genomic data of arbitrary scale.  Tasks can be easily
    split by chromosome for distributing whole-genome analyses across a
    computational cluster.")
        (license license:gpl2+)))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public bedtools
      (package
        (name "bedtools")
    
        (version "2.24.0")
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
        (source (origin
                  (method url-fetch)
                  (uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
                                      version ".tar.gz"))
    
                  (file-name (string-append name "-" version ".tar.gz"))
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
                  (sha256
                   (base32
    
                    "0lnxrjvs3nnmb4bmskag1wg3h2hd80przz5q3xd0bvs7vyxrvpbl"))
                  (patches (list (search-patch "bedtools-32bit-compilation.patch")))))
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
        (build-system gnu-build-system)
        (native-inputs `(("python" ,python-2)))
        (inputs `(("samtools" ,samtools)
                  ("zlib" ,zlib)))
        (arguments
         '(#:test-target "test"
           #:phases
           (alist-cons-after
            'unpack 'patch-makefile-SHELL-definition
            (lambda _
              ;; patch-makefile-SHELL cannot be used here as it does not
              ;; yet patch definitions with `:='.  Since changes to
              ;; patch-makefile-SHELL result in a full rebuild, features
              ;; of patch-makefile-SHELL are reimplemented here.
              (substitute* "Makefile"
                (("^SHELL := .*$") (string-append "SHELL := " (which "bash") " -e \n"))))
            (alist-delete
             'configure
             (alist-replace
              'install
              (lambda* (#:key outputs #:allow-other-keys)
                (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
                  (mkdir-p bin)
                  (for-each (lambda (file)
                              (copy-file file (string-append bin (basename file))))
                            (find-files "bin" ".*"))))
              %standard-phases)))))
        (home-page "https://github.com/arq5x/bedtools2")
        (synopsis "Tools for genome analysis and arithmetic")
        (description
         "Collectively, the bedtools utilities are a swiss-army knife of tools for
    a wide-range of genomics analysis tasks.  The most widely-used tools enable
    genome arithmetic: that is, set theory on the genome.  For example, bedtools
    allows one to intersect, merge, count, complement, and shuffle genomic
    intervals from multiple files in widely-used genomic file formats such as BAM,
    BED, GFF/GTF, VCF.")
        (license license:gpl2)))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public python2-pybedtools
      (package
        (name "python2-pybedtools")
        (version "0.6.9")
        (source (origin
                  (method url-fetch)
                  (uri (string-append
                        "https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
                        version ".tar.gz"))
                  (sha256
                   (base32
                    "1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
        (build-system python-build-system)
        (arguments `(#:python ,python-2)) ; no Python 3 support
        (inputs
         `(("python-cython" ,python2-cython)
           ("python-matplotlib" ,python2-matplotlib)))
        (propagated-inputs
         `(("bedtools" ,bedtools)
           ("samtools" ,samtools)))
        (native-inputs
         `(("python-pyyaml" ,python2-pyyaml)
           ("python-nose" ,python2-nose)
           ("python-setuptools" ,python2-setuptools)))
        (home-page "https://pythonhosted.org/pybedtools/")
        (synopsis "Python wrapper for BEDtools programs")
        (description
         "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
    which are widely used for genomic interval manipulation or \"genome algebra\".
    pybedtools extends BEDTools by offering feature-level manipulations from with
    Python.")
        (license license:gpl2+)))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public bioperl-minimal
      (let* ((inputs `(("perl-module-build" ,perl-module-build)
                       ("perl-data-stag" ,perl-data-stag)
                       ("perl-libwww" ,perl-libwww)
                       ("perl-uri" ,perl-uri)))
             (transitive-inputs
              (map (compose package-name cadr)
                   (delete-duplicates
                    (concatenate
                     (map (compose package-transitive-target-inputs cadr) inputs))))))
        (package
          (name "bioperl-minimal")
          (version "1.6.924")
          (source
           (origin
             (method url-fetch)
             (uri (string-append "mirror://cpan/authors/id/C/CJ/CJFIELDS/BioPerl-"
                                 version ".tar.gz"))
             (sha256
              (base32
               "1l3npcvvvwjlhkna9dndpfv1hklhrgva013kw96m0n1wpd37ask1"))))
          (build-system perl-build-system)
          (arguments
           `(#:phases
             (modify-phases %standard-phases
               (add-after
                'install 'wrap-programs
                (lambda* (#:key outputs #:allow-other-keys)
                  ;; Make sure all executables in "bin" find the required Perl
                  ;; modules at runtime.  As the PERL5LIB variable contains also
                  ;; the paths of native inputs, we pick the transitive target
                  ;; inputs from %build-inputs.
                  (let* ((out  (assoc-ref outputs "out"))
                         (bin  (string-append out "/bin/"))
                         (path (string-join
                                (cons (string-append out "/lib/perl5/site_perl")
                                      (map (lambda (name)
                                             (assoc-ref %build-inputs name))
                                           ',transitive-inputs))
                                ":")))
                    (for-each (lambda (file)
                                (wrap-program file
                                  `("PERL5LIB" ":" prefix (,path))))
                              (find-files bin "\\.pl$"))
                    #t))))))
          (inputs inputs)
          (native-inputs
           `(("perl-test-most" ,perl-test-most)))
          (home-page "http://search.cpan.org/dist/BioPerl")
          (synopsis "Bioinformatics toolkit")
          (description
           "BioPerl is the product of a community effort to produce Perl code which
    is useful in biology.  Examples include Sequence objects, Alignment objects
    and database searching objects.  These objects not only do what they are
    advertised to do in the documentation, but they also interact - Alignment
    objects are made from the Sequence objects, Sequence objects have access to
    Annotation and SeqFeature objects and databases, Blast objects can be
    converted to Alignment objects, and so on.  This means that the objects
    provide a coordinated and extensible framework to do computational biology.")
          (license (package-license perl)))))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public python-biopython
      (package
        (name "python-biopython")
        (version "1.65")
        (source (origin
                  (method url-fetch)
                  (uri (string-append
                        "http://biopython.org/DIST/biopython-"
                        version ".tar.gz"))
                  (sha256
                   (base32
                    "13m8s9jkrw40zvdp1rl709n6lmgdh4f52aann7gzr6sfp0fwhg26"))))
        (build-system python-build-system)
        (inputs
         `(("python-numpy" ,python-numpy)))
        (native-inputs
         `(("python-setuptools" ,python2-setuptools)))
        (home-page "http://biopython.org/")
        (synopsis "Tools for biological computation in Python")
        (description
         "Biopython is a set of tools for biological computation including parsers
    for bioinformatics files into Python data structures; interfaces to common
    bioinformatics programs; a standard sequence class and tools for performing
    common operations on them; code to perform data classification; code for
    dealing with alignments; code making it easy to split up parallelizable tasks
    into separate processes; and more.")
        (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
    
    (define-public python2-biopython
      (package (inherit (package-with-python2 python-biopython))
        (inputs
         `(("python2-numpy" ,python2-numpy)))))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public blast+
      (package
        (name "blast+")
        (version "2.2.31")
        (source (origin
                  (method url-fetch)
                  (uri (string-append
                        "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
                        version "/ncbi-blast-" version "+-src.tar.gz"))
                  (sha256
                   (base32
                    "19gq6as4k1jrgsd26158ads6h7v4jca3h4r5dzg1y0m6ya50x5ph"))
                  (modules '((guix build utils)))
                  (snippet
                   '(begin
                      ;; Remove bundled bzip2 and zlib
                      (delete-file-recursively "c++/src/util/compress/bzip2")
                      (delete-file-recursively "c++/src/util/compress/zlib")
                      (substitute* "c++/src/util/compress/Makefile.in"
                        (("bzip2 zlib api") "api"))
                      ;; Remove useless msbuild directory
                      (delete-file-recursively
                       "c++/src/build-system/project_tree_builder/msbuild")
                      #t))))
        (build-system gnu-build-system)
        (arguments
         `(;; There are three(!) tests for this massive library, and all fail with
           ;; "unparsable timing stats".
           ;; ERR [127] --  [util/regexp] test_pcre.sh     (unparsable timing stats)
           ;; ERR [127] --  [serial/datatool] datatool.sh     (unparsable timing stats)
           ;; ERR [127] --  [serial/datatool] datatool_xml.sh     (unparsable timing stats)
           #:tests? #f
           #:out-of-source? #t
           #:parallel-build? #f ; not supported
           #:phases
           (modify-phases %standard-phases
             (add-before
              'configure 'set-HOME
              ;; $HOME needs to be set at some point during the configure phase
              (lambda _ (setenv "HOME" "/tmp") #t))
             (add-after
              'unpack 'enter-dir
              (lambda _ (chdir "c++") #t))
             (add-after
              'enter-dir 'fix-build-system
              (lambda _
                (define (which* cmd)
                  (cond ((string=? cmd "date")
                         ;; make call to "date" deterministic
                         "date -d @0")
                        ((which cmd)
                         => identity)
                        (else
                         (format (current-error-port)
                                 "WARNING: Unable to find absolute path for ~s~%"
                                 cmd)
                         #f)))
    
                ;; Rewrite hardcoded paths to various tools
                (substitute* (append '("src/build-system/configure.ac"
                                       "src/build-system/configure"
                                       "scripts/common/impl/if_diff.sh"
                                       "scripts/common/impl/run_with_lock.sh"
                                       "src/build-system/Makefile.configurables.real"
                                       "src/build-system/Makefile.in.top"
                                       "src/build-system/Makefile.meta.gmake=no"
                                       "src/build-system/Makefile.meta.in"
                                       "src/build-system/Makefile.meta_l"
                                       "src/build-system/Makefile.meta_p"
                                       "src/build-system/Makefile.meta_r"
                                       "src/build-system/Makefile.mk.in"
                                       "src/build-system/Makefile.requirements"
                                       "src/build-system/Makefile.rules_with_autodep.in")
                                     (find-files "scripts/common/check" "\\.sh$"))
                  (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
                   (or (which* cmd) all)))
    
                (substitute* (find-files "src/build-system" "^config.*")
                  (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
                  (("^PATH=.*") ""))
    
                ;; rewrite "/var/tmp" in check script
                (substitute* "scripts/common/check/check_make_unix.sh"
                  (("/var/tmp") "/tmp"))
    
                ;; do not reset PATH
                (substitute* (find-files "scripts/common/impl/" "\\.sh$")
                  (("^ *PATH=.*") "")
                  (("action=/bin/") "action=")
                  (("export PATH") ":"))
                #t))
             (replace
              'configure
              (lambda* (#:key inputs outputs #:allow-other-keys)
                (let ((out     (assoc-ref outputs "out"))
                      (lib     (string-append (assoc-ref outputs "lib") "/lib"))
                      (include (string-append (assoc-ref outputs "include")
                                              "/include/ncbi-tools++")))
                  ;; The 'configure' script doesn't recognize things like
                  ;; '--enable-fast-install'.
                  (zero? (system* "./configure.orig"
                                  (string-append "--with-build-root=" (getcwd) "/build")
                                  (string-append "--prefix=" out)
                                  (string-append "--libdir=" lib)
                                  (string-append "--includedir=" include)
                                  (string-append "--with-bz2="
                                                 (assoc-ref inputs "bzip2"))
                                  (string-append "--with-z="
                                                 (assoc-ref inputs "zlib"))
                                  ;; Each library is built twice by default, once
                                  ;; with "-static" in its name, and again
                                  ;; without.
                                  "--without-static"
                                  "--with-dll"))))))))
        (outputs '("out"       ;  19 MB
                   "lib"       ; 203 MB
                   "include")) ;  32 MB
        (inputs
         `(("bzip2" ,bzip2)
           ("zlib" ,zlib)))
        (native-inputs
         `(("cpio" ,cpio)))
        (home-page "http://blast.ncbi.nlm.nih.gov")
        (synopsis "Basic local alignment search tool")
        (description
         "BLAST is a popular method of performing a DNA or protein sequence
    similarity search, using heuristics to produce results quickly.  It also
    calculates an “expect value” that estimates how many matches would have
    occurred at a given score by chance, which can aid a user in judging how much
    confidence to have in an alignment.")
        ;; Most of the sources are in the public domain, with the following
        ;; exceptions:
        ;;   * Expat:
        ;;     * ./c++/include/util/bitset/
        ;;     * ./c++/src/html/ncbi_menu*.js
        ;;   * Boost license:
        ;;     * ./c++/include/util/impl/floating_point_comparison.hpp
        ;;   * LGPL 2+:
        ;;     * ./c++/include/dbapi/driver/odbc/unix_odbc/
        ;;   * ASL 2.0:
        ;;     * ./c++/src/corelib/teamcity_*
        (license (list license:public-domain
                       license:expat
                       license:boost1.0
                       license:lgpl2.0+
                       license:asl2.0))))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public bowtie
      (package
        (name "bowtie")
        (version "2.2.4")
        (source (origin
                  (method url-fetch)
                  (uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
                                      version ".tar.gz"))
    
                  (file-name (string-append name "-" version ".tar.gz"))
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
                  (sha256
                   (base32
                    "15dnbqippwvhyh9zqjhaxkabk7lm1xbh1nvar1x4b5kwm117zijn"))
                  (modules '((guix build utils)))
                  (snippet
                   '(substitute* "Makefile"
                      (("^CC = .*$") "CC = gcc")
                      (("^CPP = .*$") "CPP = g++")
                      ;; replace BUILD_HOST and BUILD_TIME for deterministic build
                      (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
    
                      (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))
                  (patches (list (search-patch "bowtie-fix-makefile.patch")))))
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
        (build-system gnu-build-system)
        (inputs `(("perl" ,perl)
                  ("perl-clone" ,perl-clone)
                  ("perl-test-deep" ,perl-test-deep)
                  ("perl-test-simple" ,perl-test-simple)
                  ("python" ,python-2)))
        (arguments
         '(#:make-flags '("allall")
           #:phases
           (alist-delete
            'configure
            (alist-replace
             'install
             (lambda* (#:key outputs #:allow-other-keys)
               (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
                 (mkdir-p bin)
                 (for-each (lambda (file)
                             (copy-file file (string-append bin file)))
                           (find-files "." "bowtie2.*"))))
             (alist-replace
              'check
              (lambda* (#:key outputs #:allow-other-keys)
                (system* "perl"
                         "scripts/test/simple_tests.pl"
                         "--bowtie2=./bowtie2"
                         "--bowtie2-build=./bowtie2-build"))
              %standard-phases)))))
        (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
        (synopsis "Fast and sensitive nucleotide sequence read aligner")
        (description
         "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
    reads to long reference sequences.  It is particularly good at aligning reads
    of about 50 up to 100s or 1,000s of characters, and particularly good at
    aligning to relatively long (e.g. mammalian) genomes.  Bowtie 2 indexes the
    genome with an FM Index to keep its memory footprint small: for the human
    genome, its memory footprint is typically around 3.2 GB.  Bowtie 2 supports
    gapped, local, and paired-end alignment modes.")
    
        (supported-systems '("x86_64-linux"))
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
        (license license:gpl3+)))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public bwa
      (package
        (name "bwa")
        (version "0.7.12")
        (source (origin
                  (method url-fetch)
                  (uri (string-append "mirror://sourceforge/bio-bwa/bwa-"
                                      version ".tar.bz2"))
                  (sha256
                   (base32
                    "1330dpqncv0px3pbhjzz1gwgg39kkcv2r9qp2xs0sixf8z8wl7bh"))))
        (build-system gnu-build-system)
        (arguments
         '(#:tests? #f ;no "check" target
           #:phases
           (alist-replace
            'install
            (lambda* (#:key outputs #:allow-other-keys)
              (let ((bin (string-append
                          (assoc-ref outputs "out") "/bin"))
                    (doc (string-append
                          (assoc-ref outputs "out") "/share/doc/bwa"))
                    (man (string-append
                          (assoc-ref outputs "out") "/share/man/man1")))
                (mkdir-p bin)
                (mkdir-p doc)
                (mkdir-p man)
                (copy-file "bwa" (string-append bin "/bwa"))
                (copy-file "README.md" (string-append doc "/README.md"))
                (copy-file "bwa.1" (string-append man "/bwa.1"))))
            ;; no "configure" script
            (alist-delete 'configure %standard-phases))))
        (inputs `(("zlib" ,zlib)))
        (home-page "http://bio-bwa.sourceforge.net/")
        (synopsis "Burrows-Wheeler sequence aligner")
        (description
         "BWA is a software package for mapping low-divergent sequences against a
    large reference genome, such as the human genome.  It consists of three
    algorithms: BWA-backtrack, BWA-SW and BWA-MEM.  The first algorithm is
    designed for Illumina sequence reads up to 100bp, while the rest two for
    longer sequences ranged from 70bp to 1Mbp.  BWA-MEM and BWA-SW share similar
    features such as long-read support and split alignment, but BWA-MEM, which is
    the latest, is generally recommended for high-quality queries as it is faster
    and more accurate.  BWA-MEM also has better performance than BWA-backtrack for
    70-100bp Illumina reads.")
        (license license:gpl3+)))
    
    
    (define-public python2-bx-python
      (package
        (name "python2-bx-python")
        (version "0.7.2")
        (source (origin
                  (method url-fetch)
                  (uri (string-append
                        "https://pypi.python.org/packages/source/b/bx-python/bx-python-"
                        version ".tar.gz"))
                  (sha256
                   (base32
                    "0ld49idhc5zjdvbhvjq1a2qmpjj7h5v58rqr25dzmfq7g34b50xh"))
                  (modules '((guix build utils)))
                  (snippet
                   '(substitute* "setup.py"
                      ;; remove dependency on outdated "distribute" module
                      (("^from distribute_setup import use_setuptools") "")
                      (("^use_setuptools\\(\\)") "")))))
        (build-system python-build-system)
        (arguments
         `(#:tests? #f ;tests fail because test data are not included
           #:python ,python-2))
        (inputs
         `(("python-numpy" ,python2-numpy)
           ("zlib" ,zlib)))
        (native-inputs
         `(("python-nose" ,python2-nose)
           ("python-setuptools" ,python2-setuptools)))
        (home-page "http://bitbucket.org/james_taylor/bx-python/")
        (synopsis "Tools for manipulating biological data")
        (description
         "bx-python provides tools for manipulating biological data, particularly
    multiple sequence alignments.")
        (license license:expat)))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public clipper
      (package
        (name "clipper")
        (version "0.3.0")
        (source (origin
                  (method url-fetch)
                  (uri (string-append
                        "https://github.com/YeoLab/clipper/archive/"
                        version ".tar.gz"))
                  (sha256
                   (base32
                    "1q7jpimsqln7ic44i8v2rx2haj5wvik8hc1s2syd31zcn0xk1iyq"))
                  (modules '((guix build utils)))
                  (snippet
                   ;; remove unnecessary setup dependency
                   '(substitute* "setup.py"
                      (("setup_requires = .*") "")))))
        (build-system python-build-system)
        (arguments `(#:python ,python-2)) ; only Python 2 is supported
        (inputs
         `(("htseq" ,htseq)
           ("python-pybedtools" ,python2-pybedtools)
           ("python-cython" ,python2-cython)
           ("python-scikit-learn" ,python2-scikit-learn)
           ("python-matplotlib" ,python2-matplotlib)
           ("python-pysam" ,python2-pysam)
           ("python-numpy" ,python2-numpy)
           ("python-scipy" ,python2-scipy)))
        (native-inputs
         `(("python-mock" ,python2-mock) ; for tests
           ("python-pytz" ,python2-pytz) ; for tests
           ("python-setuptools" ,python2-setuptools)))
        (home-page "https://github.com/YeoLab/clipper")
        (synopsis "CLIP peak enrichment recognition")
        (description
         "CLIPper is a tool to define peaks in CLIP-seq datasets.")
        (license license:gpl2)))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public couger
      (package
        (name "couger")
        (version "1.8.2")
        (source (origin
                  (method url-fetch)
                  (uri (string-append
                        "http://couger.oit.duke.edu/static/assets/COUGER"
                        version ".zip"))
                  (sha256
                   (base32
                    "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
        (build-system gnu-build-system)
        (arguments
         `(#:tests? #f
           #:phases
           (modify-phases %standard-phases
             (delete 'configure)
             (delete 'build)
             (replace
              'install
              (lambda* (#:key outputs #:allow-other-keys)
                (let ((out (assoc-ref outputs "out")))
                  (copy-recursively "src" (string-append out "/src"))
                  (mkdir (string-append out "/bin"))
                  ;; Add "src" directory to module lookup path.
                  (substitute* "couger"
                    (("from argparse")
                     (string-append "import sys\nsys.path.append(\""
                                    out "\")\nfrom argparse")))
                  (copy-file "couger" (string-append out "/bin/couger")))
                #t))
             (add-after
              'install 'wrap-program
              (lambda* (#:key inputs outputs #:allow-other-keys)
                ;; Make sure 'couger' runs with the correct PYTHONPATH.
                (let* ((out (assoc-ref outputs "out"))
                       (path (getenv "PYTHONPATH")))
                  (wrap-program (string-append out "/bin/couger")
                    `("PYTHONPATH" ":" prefix (,path))))
                #t)))))
        (inputs
         `(("python" ,python-2)
           ("python2-pillow" ,python2-pillow)
           ("python2-numpy" ,python2-numpy)
           ("python2-scipy" ,python2-scipy)
           ("python2-matplotlib" ,python2-matplotlib)))
        (propagated-inputs
         `(("r" ,r)
           ("libsvm" ,libsvm)
           ("randomjungle" ,randomjungle)))
        (native-inputs
         `(("unzip" ,unzip)))
        (home-page "http://couger.oit.duke.edu")
        (synopsis "Identify co-factors in sets of genomic regions")
        (description
         "COUGER can be applied to any two sets of genomic regions bound by
    paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
    putative co-factors that provide specificity to each TF.  The framework
    determines the genomic targets uniquely-bound by each TF, and identifies a
    small set of co-factors that best explain the in vivo binding differences
    between the two TFs.
    
    COUGER uses classification algorithms (support vector machines and random
    forests) with features that reflect the DNA binding specificities of putative
    co-factors.  The features are generated either from high-throughput TF-DNA
    binding data (from protein binding microarray experiments), or from large
    collections of DNA motifs.")
        (license license:gpl3+)))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public clustal-omega
      (package
        (name "clustal-omega")
        (version "1.2.1")
        (source (origin
                  (method url-fetch)
                  (uri (string-append
                        "http://www.clustal.org/omega/clustal-omega-"
                        version ".tar.gz"))
                  (sha256
                   (base32
                    "02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf"))))
        (build-system gnu-build-system)
        (inputs
         `(("argtable" ,argtable)))
        (home-page "http://www.clustal.org/omega/")
        (synopsis "Multiple sequence aligner for protein and DNA/RNA")
        (description
         "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
    program for protein and DNA/RNA.  It produces high quality MSAs and is capable
    of handling data-sets of hundreds of thousands of sequences in reasonable
    time.")
        (license license:gpl2+)))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public crossmap
      (package
        (name "crossmap")
        (version "0.1.6")
        (source (origin
                  (method url-fetch)
                  (uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
                                      version ".tar.gz"))
                  (sha256
                   (base32
                    "163hi5gjgij6cndxlvbkp5jjwr0k4wbm9im6d2210278q7k9kpnp"))
                  ;; patch has been sent upstream already
                  (patches (list
                            (search-patch "crossmap-allow-system-pysam.patch")))
                  (modules '((guix build utils)))
                  ;; remove bundled copy of pysam
                  (snippet
                   '(delete-file-recursively "lib/pysam"))))
        (build-system python-build-system)
        (arguments
         `(#:python ,python-2
           #:phases
           (alist-cons-after
            'unpack 'set-env
            (lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1"))
            %standard-phases)))
        (inputs
         `(("python-numpy" ,python2-numpy)
           ("python-pysam" ,python2-pysam)
           ("zlib" ,zlib)))
        (native-inputs
         `(("python-cython" ,python2-cython)
           ("python-nose" ,python2-nose)
           ("python-setuptools" ,python2-setuptools)))
        (home-page "http://crossmap.sourceforge.net/")
        (synopsis "Convert genome coordinates between assemblies")
        (description
         "CrossMap is a program for conversion of genome coordinates or annotation
    files between different genome assemblies.  It supports most commonly used
    file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
        (license license:gpl2+)))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public cutadapt
      (package
        (name "cutadapt")
        (version "1.8")
        (source (origin
                  (method url-fetch)
                  (uri (string-append
                        "https://github.com/marcelm/cutadapt/archive/v"
                        version ".tar.gz"))
                  (file-name (string-append name "-" version ".tar.gz"))
                  (sha256
                   (base32
                    "161bp87y6gd6r5bmvjpn2b1k942i3fizfpa139f0jn6jv1wcp5h5"))))
        (build-system python-build-system)
        (arguments
         ;; tests must be run after install
         `(#:phases (alist-cons-after
                     'install 'check
                     (lambda* (#:key inputs outputs #:allow-other-keys)
                       (setenv "PYTHONPATH"
                               (string-append
                                (getenv "PYTHONPATH")
                                ":" (assoc-ref outputs "out")
                                "/lib/python"
                                (string-take (string-take-right
                                              (assoc-ref inputs "python") 5) 3)
                                "/site-packages"))
                       (zero? (system* "nosetests" "-P" "tests")))
                     (alist-delete 'check %standard-phases))))
        (native-inputs
         `(("python-cython" ,python-cython)
           ("python-nose" ,python-nose)
           ("python-setuptools" ,python-setuptools)))
        (home-page "https://code.google.com/p/cutadapt/")
        (synopsis "Remove adapter sequences from nucleotide sequencing reads")
        (description
         "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
    other types of unwanted sequence from high-throughput sequencing reads.")
        (license license:expat)))
    
    
    Ben Woodcroft's avatar
    Ben Woodcroft committed
    (define-public diamond
      (package
        (name "diamond")
        (version "0.7.9")
        (source (origin
                  (method url-fetch)
                  (uri (string-append
                        "https://github.com/bbuchfink/diamond/archive/v"
                        version ".tar.gz"))
                  (file-name (string-append name "-" version ".tar.gz"))
                  (sha256
                   (base32
                    "0hfkcfv9f76h5brbyw9fyvmc0l9cmbsxrcdqk0fa9xv82zj47p15"))
                  (snippet '(begin
                              (delete-file "bin/diamond")
                              #t))))
        (build-system gnu-build-system)
        (arguments
         '(#:tests? #f  ;no "check" target
           #:phases
           (modify-phases %standard-phases
             (add-after 'unpack 'enter-source-dir
                        (lambda _
                          (chdir "src")
                          #t))
             (delete 'configure)
             (replace 'install
                      (lambda* (#:key outputs #:allow-other-keys)
                        (let ((bin (string-append (assoc-ref outputs "out")
                                                  "/bin")))
                          (mkdir-p bin)
                          (copy-file "../bin/diamond"
                                     (string-append bin "/diamond"))
                          #t))))))
        (native-inputs
         `(("bc" ,bc)))
        (inputs
         `(("boost" ,boost)
           ("zlib" ,zlib)))
        (home-page "https://github.com/bbuchfink/diamond")
        (synopsis "Accelerated BLAST compatible local sequence aligner")
        (description
         "DIAMOND is a BLAST-compatible local aligner for mapping protein and
    translated DNA query sequences against a protein reference database (BLASTP
    and BLASTX alignment mode).  The speedup over BLAST is up to 20,000 on short
    reads at a typical sensitivity of 90-99% relative to BLAST depending on the
    data and settings.")
        (license (license:non-copyleft "file://src/COPYING"
                                       "See src/COPYING in the distribution."))))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public edirect
      (package
        (name "edirect")
        (version "2.50")
        (source (origin
                  (method url-fetch)
                  ;; Note: older versions are not retained.
                  (uri "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/edirect.zip")
                  (sha256
                   (base32
                    "08afhz2ph66h8h381hl1mqyxkdi5nbvzsyj9gfw3jfbdijnpi4qj"))))
        (build-system perl-build-system)
        (arguments
         `(#:tests? #f ;no "check" target
           #:phases
           (modify-phases %standard-phases
             (delete 'configure)
             (delete 'build)
             (replace 'install
                      (lambda* (#:key outputs #:allow-other-keys)
                        (let ((target (string-append (assoc-ref outputs "out")
                                                     "/bin")))
                          (mkdir-p target)
                          (copy-file "edirect.pl"
                                     (string-append target "/edirect.pl"))
                          #t)))
             (add-after
              'install 'wrap-program
              (lambda* (#:key inputs outputs #:allow-other-keys)
                ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
                (let* ((out (assoc-ref outputs "out"))
                       (path (getenv "PERL5LIB")))
                  (wrap-program (string-append out "/bin/edirect.pl")
                    `("PERL5LIB" ":" prefix (,path)))))))))
        (inputs
         `(("perl-html-parser" ,perl-html-parser)
           ("perl-encode-locale" ,perl-encode-locale)
           ("perl-file-listing" ,perl-file-listing)
           ("perl-html-tagset" ,perl-html-tagset)
           ("perl-html-tree" ,perl-html-tree)
           ("perl-http-cookies" ,perl-http-cookies)
           ("perl-http-date" ,perl-http-date)
           ("perl-http-message" ,perl-http-message)
           ("perl-http-negotiate" ,perl-http-negotiate)
           ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
           ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
           ("perl-net-http" ,perl-net-http)
           ("perl-uri" ,perl-uri)
           ("perl-www-robotrules" ,perl-www-robotrules)
           ("perl" ,perl)))
        (native-inputs
         `(("unzip" ,unzip)))
        (home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288")
        (synopsis "Tools for accessing the NCBI's set of databases")
        (description
         "Entrez Direct (EDirect) is a method for accessing the National Center
    for Biotechnology Information's (NCBI) set of interconnected
    databases (publication, sequence, structure, gene, variation, expression,
    etc.) from a terminal.  Functions take search terms from command-line
    arguments.  Individual operations are combined to build multi-step queries.
    Record retrieval and formatting normally complete the process.
    
    EDirect also provides an argument-driven function that simplifies the
    extraction of data from document summaries or other results that are returned
    in structured XML format.  This can eliminate the need for writing custom
    software to answer ad hoc questions.")
        (license license:public-domain)))
    
    
    Ricardo Wurmus's avatar
    Ricardo Wurmus committed
    (define-public express
      (package
        (name "express")
        (version "1.5.1")
        (source (origin