Create scala aplication

You can do this directly with sbt, see How to create scala project with sbt, or with IntelliJ, see How to install IntelliJ Community Edition for Scala on mac,

Set up dependencies

In build.sbt add

name := "Hello Spark"

version := "0.1"

scalaVersion := "2.12.10"

libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.0.1"

Define new class

Create file src/main/scala/SimpleSpark.scala with

import org.apache.spark.sql.{SparkSession, Row}
import org.apache.spark.sql.types.{StructType, StructField, StringType, IntegerType}
import org.apache.spark.sql.functions.{col, sum, pow, lit}

object SimpleSpark {
  def main(args: Array[String]) {

    val spark = SparkSession.builder.appName("Hello Spark").getOrCreate()

    val userData = Seq(
      Row("user1", 1),
      Row("user2", 2),
      Row("user1", 3),
      Row("user3", 4)
    )

    val userSchema = List(
      StructField("userId", StringType, true),
      StructField("spend", IntegerType, true)
    )

    val dataset = spark.createDataFrame(
      spark.sparkContext.parallelize(userData),
      StructType(userSchema)
    )

    val x = dataset.select(
        sum(pow(col("spend"), lit(2))).alias("sumSquares")
    ).collect()

    println("*********************")
    println(s"Sum of squares is ${x(0)}")
    println("*********************")

    spark.stop()
  }
}

Build package

sbt package

Run with local spark

$SPARK_HOME/bin/spark-submit \
  --class "SimpleSpark" \
  --master local \
  target/scala-2.12/hello-spark_2.12-0.1.jar

Updated: 2020-12-13