From f6b1cc61cacf8d462a563b885065902a78d72fa7 Mon Sep 17 00:00:00 2001 From: Petar Petrov Date: Mon, 24 Dec 2018 09:09:59 +0700 Subject: academic/clark-ugene: Added (supervised sequence classification). Signed-off-by: Willy Sudiarto Raharjo --- academic/clark-ugene/README | 39 +++++++++++ academic/clark-ugene/clark-ugene.SlackBuild | 103 ++++++++++++++++++++++++++++ academic/clark-ugene/clark-ugene.info | 10 +++ academic/clark-ugene/slack-desc | 19 +++++ 4 files changed, 171 insertions(+) create mode 100644 academic/clark-ugene/README create mode 100644 academic/clark-ugene/clark-ugene.SlackBuild create mode 100644 academic/clark-ugene/clark-ugene.info create mode 100644 academic/clark-ugene/slack-desc (limited to 'academic') diff --git a/academic/clark-ugene/README b/academic/clark-ugene/README new file mode 100644 index 0000000000..4e9386f2ff --- /dev/null +++ b/academic/clark-ugene/README @@ -0,0 +1,39 @@ +This is Ugene's (http://ugene.net/) fork of the CLARK tool +(http://clark.cs.ucr.edu/Tool/), with supports building DB directly from +gzip & 7z packed RefSeq files + +CLARK: CLAssifier based on Reduced K-mers + +The problem of DNA sequence classification is central to several +application domains in molecular biology, genomics, metagenomics and +genetics. The problem is computationally challenging due to the size of +datasets generated by modern sequencing instruments and the growing size +of reference sequence databases. + +CLARK is a novel method for supervised sequence classification based on +discriminative k-mers. Somewhat unique among other metagenomic and +genomic classification methods, CLARK provides a confidence score for +its assignments which can be used in downstream analysis. The utility of +CLARK is demonstrated on two distinct specific classification problems: + +1) the assignment of metagenomic reads to known bacterial genomes +2) the assignment of BAC clones and transcript to chromosome arms (in + the absence of a finished assembly for the reference genome). + +Three classifiers or variants in the CLARK framework are provided : +CLARK (default): created for powerful workstation, it may require a +significant amount of RAM to run with large database (e.g., all +bacterial genomes from NCBI/RefSeq). This classifier queries k-mers +with exact matching. + +CLARK-l (light): created for workstations with limited memory, this +software tool provides precise classification on small metagenomes. +Indeed, for metagenomics analysis, CLARK-l works with a sparse or +"light" database (up to 4 GB of RAM) that is built using distant and +non-overlapping k-mers. This classifier queries k-mers with exact +matching. + +CLARK-S (spaced): created for powerful workstation exploiting spaced k- +mers, this classifier requires a higher RAM usage than CLARK or CLARK-l, +but it does offer a higher sensitivity. CLARK-S completes the CLARK +series of classifiers. diff --git a/academic/clark-ugene/clark-ugene.SlackBuild b/academic/clark-ugene/clark-ugene.SlackBuild new file mode 100644 index 0000000000..bd2a8e55c1 --- /dev/null +++ b/academic/clark-ugene/clark-ugene.SlackBuild @@ -0,0 +1,103 @@ +#!/bin/sh + +# Slackware build script for clark-ugene + +# Copyright 2018 Petar Petrov slackalaxy@gmail.com +# All rights reserved. +# +# Redistribution and use of this script, with or without modification, is +# permitted provided that the following conditions are met: +# +# 1. Redistributions of this script must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED +# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +SRCNAM=clark +PRGNAM=${SRCNAM}-ugene +VERSION=${VERSION:-git_23801a2} +COMMIT=${COMMIT:-23801a2738b7c104c8a7a1402d50d94fb5b4036b} +BUILD=${BUILD:-1} +TAG=${TAG:-_SBo} + +if [ -z "$ARCH" ]; then + case "$( uname -m )" in + i?86) ARCH=i586 ;; + arm*) ARCH=arm ;; + *) ARCH=$( uname -m ) ;; + esac +fi + +CWD=$(pwd) +TMP=${TMP:-/tmp/SBo} +PKG=$TMP/package-$PRGNAM +OUTPUT=${OUTPUT:-/tmp} + +if [ "$ARCH" = "i586" ]; then + SLKCFLAGS="-O2 -march=i586 -mtune=i686" + LIBDIRSUFFIX="" +elif [ "$ARCH" = "i686" ]; then + SLKCFLAGS="-O2 -march=i686 -mtune=i686" + LIBDIRSUFFIX="" +elif [ "$ARCH" = "x86_64" ]; then + SLKCFLAGS="-O2 -fPIC" + LIBDIRSUFFIX="64" +else + SLKCFLAGS="-O2" + LIBDIRSUFFIX="" +fi + +set -e + +rm -rf $PKG +mkdir -p $TMP $PKG $OUTPUT +cd $TMP +rm -rf $SRCNAM-$COMMIT +tar xvf $CWD/$SRCNAM-$COMMIT.tar.gz +cd $SRCNAM-$COMMIT +chown -R root:root . +find -L . \ + \( -perm 777 -o -perm 775 -o -perm 750 -o -perm 711 -o -perm 555 \ + -o -perm 511 \) -exec chmod 755 {} \; -o \ + \( -perm 666 -o -perm 664 -o -perm 640 -o -perm 600 -o -perm 444 \ + -o -perm 440 -o -perm 400 \) -exec chmod 644 {} \; + +mkdir -p build +cd build + cmake \ + -DCMAKE_C_FLAGS:STRING="$SLKCFLAGS" \ + -DCMAKE_CXX_FLAGS:STRING="$SLKCFLAGS" \ + -DCMAKE_INSTALL_PREFIX=/usr \ + -DCMAKE_BUILD_TYPE=Release .. + make +cd .. + +cd bin +for i in * ; do + install -D -m755 $i $PKG/usr/bin/$i +done +cd .. + +install -D -m755 builddb.sh $PKG/usr/bin/builddb.sh + +find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" | grep ELF \ + | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true + +mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION +cp -a README.md $PKG/usr/doc/$PRGNAM-$VERSION +cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild + +mkdir -p $PKG/install +cat $CWD/slack-desc > $PKG/install/slack-desc + +cd $PKG +/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.${PKGTYPE:-tgz} diff --git a/academic/clark-ugene/clark-ugene.info b/academic/clark-ugene/clark-ugene.info new file mode 100644 index 0000000000..3e139342eb --- /dev/null +++ b/academic/clark-ugene/clark-ugene.info @@ -0,0 +1,10 @@ +PRGNAM="clark-ugene" +VERSION="git_23801a2" +HOMEPAGE="https://github.com/ugeneunipro/clark" +DOWNLOAD="UNSUPPORTED" +MD5SUM="" +DOWNLOAD_x86_64="https://github.com/ugeneunipro/clark/archive/23801a2/clark-23801a2738b7c104c8a7a1402d50d94fb5b4036b.tar.gz" +MD5SUM_x86_64="f73ffa62e4ae6241f07d4d9fc814b455" +REQUIRES="" +MAINTAINER="Petar Petrov" +EMAIL="slackalaxy@gmail.com" diff --git a/academic/clark-ugene/slack-desc b/academic/clark-ugene/slack-desc new file mode 100644 index 0000000000..56b5c45e66 --- /dev/null +++ b/academic/clark-ugene/slack-desc @@ -0,0 +1,19 @@ +# HOW TO EDIT THIS FILE: +# The "handy ruler" below makes it easier to edit a package description. +# Line up the first '|' above the ':' following the base package name, and +# the '|' on the right side marks the last column you can put a character in. +# You must make exactly 11 lines for the formatting to be correct. It's also +# customary to leave one space after the ':' except on otherwise blank lines. + + |-----handy-ruler------------------------------------------------------| +clark-ugene: clark-ugene (Ugene's forked CLAssifier based on Reduced K-mers) +clark-ugene: +clark-ugene: CLARK is a novel method for supervised sequence classification based +clark-ugene: on discriminative k-mers. Somewhat unique among other metagenomic +clark-ugene: and genomic classification methods, CLARK provides a confidence +clark-ugene: score for its assignments which can be used in downstream analysis. +clark-ugene: +clark-ugene: Home: https://github.com/ugeneunipro/clark +clark-ugene: +clark-ugene: This is Ugene's (http://ugene.net/) fork of the CLARK tool +clark-ugene: (http://clark.cs.ucr.edu/) -- cgit v1.2.3