Powered by GitBook

Spark Configuration

Every Spark job needs to configure either SparkContext or SQLContext. The intention of this helper class is to simplify interpretation of job arguments.

object SparkConfigUtils {
  def sparkContext(args : Array[String]) : SparkContext = {
    val conf = new SparkConf()
    if (conf.getOption("app.name").isEmpty) {
      conf.setAppName("GitHub push counter")
    } else {
      println("APP NAME PROVIDED: " + conf.get("appName"))
    }
    if (conf.getOption("master").isEmpty) {
      conf.setMaster("local[*]")
    } else {
      println("MASTER PROVIDED: " + conf.get("master"))
    }
    //conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    conf.set("spark.io.compression.codec", "lz4")
    new SparkContext(conf)
  }

def sparkSQLContext(sparkContext : SparkContext) : SQLContext = new SQLContext(sparkContext)

  def sparkHiveSQLContext(sparkContext: SparkContext) : HiveContext = new HiveContext(sparkContext)
}

results matching ""

No results matching ""