Package Details: apache-spark 2.4.0-1

Git Clone URL: https://aur.archlinux.org/apache-spark.git (read-only)
Package Base: apache-spark
Description: fast and general engine for large-scale data processing
Upstream URL: http://spark.apache.org
Licenses: Apache
Submitter: huitseeker
Maintainer: lukaszimmermann (emanuelfontelles)
Last Packager: huitseeker
Votes: 41
Popularity: 0.457314
First Submitted: 2015-10-04 09:31
Last Updated: 2019-01-30 19:46

Dependencies (9)

Required by (0)

Sources (7)

Latest Comments

1 2 3 4 5 Next › Last »

lukaszimmermann commented on 2019-07-15 09:05

Hi, sorry for being unresponsive the last couple of month. Emanuel I added you as co-maintainer, can you perhaps update this package with the new PKGBUILD for Spark 2.4.3.

emanuelfontelles commented on 2019-07-13 17:56

Hey guys, here are a full PKGBUILD with Spark-2.4.3, with Hadoop binaries, its totally works

# Maintainer: François Garillot ("huitseeker") <francois [at] garillot.net>
# Contributor: Christian Krause ("wookietreiber") <kizkizzbangbang@gmail.com>
# Contributor: Emanuel Fontelles ("emanuelfontelles") <emanuelfontelles@hotmail.com>

pkgname=apache-spark
pkgver=2.4.3
pkgrel=1
pkgdesc="fast and general engine for large-scale data processing"
arch=('any')
url="http://spark.apache.org"
license=('APACHE')
depends=('java-environment>=6' 'java-environment<9')
optdepends=('python2: python2 support for pyspark'
            'ipython2: ipython2 support for pyspark'
            'python: python3 support for pyspark'
            'ipython: ipython3 support for pyspark'
            'r: support for sparkR'
            'rsync: support rsync hadoop binaries from master'
            'hadoop: support for running on YARN')

install=apache-spark.install
source=("https://archive.apache.org/dist/spark/spark-${pkgver}/spark-${pkgver}-bin-hadoop2.7.tgz"
        'apache-spark-master.service'
        'apache-spark-slave@.service'
        'spark-env.sh'
        'spark-daemon-run.sh'
        'run-master.sh'
        'run-slave.sh')
sha1sums=('7b2f1be5c4ccec86c6d2b1e54c379b7af7a5752a'
          'ac71d12070a9a10323e8ec5aed4346b1dd7f21c6'
          'a191e4f8f7f8bbc596f4fadfb3c592c3efbc4fc0'
          '3fa39d55075d4728bd447692d648053c9f6b07ec'
          '08557d2d5328d5c99e533e16366fd893fffaad78'
          '323445b8d64aea0534a2213d2600d438f406855b'
          '65b1bc5fce63d1fa7a1b90f2d54a09acf62012a4')
backup=('etc/apache-spark/spark-env.sh')

PKGEXT=${PKGEXT:-'.pkg.tar.xz'}

prepare() {
  cd "$srcdir/spark-${pkgver}-bin-hadoop2.7"
}

package() {
        cd "$srcdir/spark-${pkgver}-bin-hadoop2.7"

        install -d "$pkgdir/usr/bin" "$pkgdir/opt" "$pkgdir/var/log/apache-spark" "$pkgdir/var/lib/apache-spark/work"
        chmod 2775 "$pkgdir/var/log/apache-spark" "$pkgdir/var/lib/apache-spark/work"

        cp -r "$srcdir/spark-${pkgver}-bin-hadoop2.7" "$pkgdir/opt/apache-spark/"

        cd "$pkgdir/usr/bin"
        for binary in beeline pyspark sparkR spark-class spark-shell find-spark-home spark-sql spark-submit load-spark-env.sh; do
                binpath="/opt/apache-spark/bin/$binary"
                ln -s "$binpath" $binary
                sed -i 's|^export SPARK_HOME=.*$|export SPARK_HOME=/opt/apache-spark|' "$pkgdir/$binpath"
                sed -i -Ee 's/\$\(dirname "\$0"\)/$(dirname "$(readlink -f "$0")")/g' "$pkgdir/$binpath"
        done

        mkdir -p $pkgdir/etc/profile.d
        echo '#!/bin/sh' > $pkgdir/etc/profile.d/apache-spark.sh
        echo 'SPARK_HOME=/opt/apache-spark' >> $pkgdir/etc/profile.d/apache-spark.sh
        echo 'export SPARK_HOME' >> $pkgdir/etc/profile.d/apache-spark.sh
        chmod 755 $pkgdir/etc/profile.d/apache-spark.sh

        install -Dm644 "$srcdir/apache-spark-master.service" "$pkgdir/usr/lib/systemd/system/apache-spark-master.service"
        install -Dm644 "$srcdir/apache-spark-slave@.service" "$pkgdir/usr/lib/systemd/system/apache-spark-slave@.service"
        install -Dm644 "$srcdir/spark-env.sh" "$pkgdir/etc/apache-spark/spark-env.sh"
        for script in run-master.sh run-slave.sh spark-daemon-run.sh; do
            install -Dm755 "$srcdir/$script" "$pkgdir/opt/apache-spark/sbin/$script"
        done
        install -Dm644 "$srcdir/spark-${pkgver}-bin-hadoop2.7/conf"/* "$pkgdir/etc/apache-spark"

        cd "$pkgdir/opt/apache-spark"
        mv conf conf-templates
        ln -sf "/etc/apache-spark" conf
        ln -sf "/var/lib/apache-spark/work" .
}

marcinn commented on 2019-06-14 20:46

PKGBUILD patch for 2.4.3:

diff --git a/PKGBUILD b/PKGBUILD
index 3540d3e..e31fec9 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -2,7 +2,7 @@
 # Contributor: Christian Krause ("wookietreiber") <kizkizzbangbang@gmail.com>

 pkgname=apache-spark
-pkgver=2.4.0
+pkgver=2.4.3
 pkgrel=1
 pkgdesc="fast and general engine for large-scale data processing"
 arch=('any')
@@ -17,14 +17,14 @@ optdepends=('python2: python2 support for pyspark'
             'rsync: support rsync hadoop binaries from master'
             'hadoop: support for running on YARN')
 install=apache-spark.install
-source=("https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=spark/spark-${pkgver}/spark-${pkgver}-bin-without-hadoop.tgz"
+source=("https://archive.apache.org/dist/spark/spark-${pkgver}/spark-${pkgver}-bin-without-hadoop.tgz"
         'apache-spark-master.service'
         'apache-spark-slave@.service'
         'spark-env.sh'
         'spark-daemon-run.sh'
         'run-master.sh'
         'run-slave.sh')
-sha1sums=('ce6fe98272b78a5c487d16f0f5f828908b65d7fe'
+sha1sums=('54bf6a19eb832dc0cf2d7a7465b785390d00122b'
           'ac71d12070a9a10323e8ec5aed4346b1dd7f21c6'
           'a191e4f8f7f8bbc596f4fadfb3c592c3efbc4fc0'
           '3fa39d55075d4728bd447692d648053c9f6b07ec'

sabitmaulanaa commented on 2019-05-27 14:23

is this package will be updated?

lukaszimmermann commented on 2019-04-26 09:29

Works, thanks!

blurbdust commented on 2019-04-26 00:31

Here is my current PKGBUILD that works for the latest spark as on 4/23/19

https://gist.github.com/blurbdust/3e03531d890c8bcf1da3c3d1192ce4d5

lukaszimmermann commented on 2019-04-25 09:14

I would be happy to become maintainer of this package, then I can take care of some open issues here.

apetresc commented on 2019-01-30 02:44

The package is so out of date that the mirror no longer carries the source package corresponding to 2.3.1, so it fails to build now.

If 2.4.0 still requires additional testing that you don't have time to do yet, then please at least bump to version 2.3.2 which is a drop-in replacement. The patch is trivial.

MangoMan commented on 2019-01-14 09:31

I concur with @wenbushi that hadoop should be dependency since installing hadoop resolved the previous JNI errors.

wenbushi commented on 2018-12-02 01:55

I've also got the missing org/slf4j/Logger error even after installing the slf4j AUR package manually. Maybe it should be specified that hadoop is a required dependency.