ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

4-Spark学习笔记4

2021-12-03 21:32:45  阅读:176  来源: 互联网

标签:val wordCount 笔记 学习 rdd words sc Spark Hello


  • SparkCore-WordCount
package com.lotuslaw.spark.core.wc

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.mutable

/**
 * @author: lotuslaw
 * @version: V1.0
 * @package: com.lotuslaw.spark.core.wc
 * @create: 2021-12-02 10:08
 * @description:
 */
object Spark01_WordCount2 {

  def main(args: Array[String]): Unit = {
    // 创建Spark运行配置对象
    val sparkConf = new SparkConf().setMaster("local[*]").setAppName("WordCount")

    // 创建Spark上下文环境对象(连接对象)
    val sc = new SparkContext(sparkConf)

    wordCount1(sc).collect().foreach(println)

    // 关闭连接
    sc.stop()
  }

  // groupBy
  def wordCount1(sc: SparkContext): RDD[(String, Int)] = {
    val rdd = sc.makeRDD(List("Hello Spark", "Hello Scala"))
    val words = rdd.flatMap(_.split(" "))
    val group = words.groupBy(word => word)
    val wordCount = group.mapValues(iter => iter.size)
    wordCount
  }

  // groupByKey
  def wordcount2(sc: SparkContext): RDD[(String, Int)] = {
    val rdd = sc.makeRDD(List("Hello Spark", "Hello Scala"))
    val words = rdd.flatMap(_.split(" "))
    val wordOne = words.map((_, 1))
    val group = wordOne.groupByKey()
    val wordCount = group.mapValues(iter => iter.size)
    wordCount
  }

  // reduceByKey
  def wordCount3(sc: SparkContext): RDD[(String, Int)] = {
    val rdd = sc.makeRDD(List("Hello Spark", "Hello Scala"))
    val words = rdd.flatMap(_.split(" "))
    val wordOne = words.map((_, 1))
    val wordCount = wordOne.reduceByKey(_ + _)
    wordCount
  }

  // aggregateByKey
  def wordCount4(sc: SparkContext): RDD[(String, Int)] = {
    val rdd = sc.makeRDD(List("Hello Spark", "Hello Scala"))
    val words = rdd.flatMap(_.split(" "))
    val wordOne = words.map((_, 1))
    val wordCount = wordOne.aggregateByKey(0)(_ + _, _ + _)
    wordCount
  }

  // foldByKey
  def wordCount5(sc: SparkContext): RDD[(String, Int)] = {
    val rdd = sc.makeRDD(List("Hello Spark", "Hello Scala"))
    val words = rdd.flatMap(_.split(" "))
    val wordOne = words.map((_, 1))
    val wordCount = wordOne.foldByKey(0)(_ + _)
    wordCount
  }

  // combineByKey
  def wordCount6(sc: SparkContext): RDD[(String, Int)] = {
    val rdd = sc.makeRDD(List("Hello Spark", "Hello Scala"))
    val words = rdd.flatMap(_.split(" "))
    val wordOne = words.map((_, 1))
    val wordCount = wordOne combineByKey(
      v => v,
      (x:Int, y) => x + y,
      (x:Int, y:Int) => x + y
    )
    wordCount
  }

  // countByKey
  def wordCount7(sc: SparkContext): collection.Map[String, Long] = {
    val rdd = sc.makeRDD(List("Hello Spark", "Hello Scala"))
    val words = rdd.flatMap(_.split(" "))
    val wordOne = words.map((_, 1))
    val WordCount = wordOne.countByKey()
    WordCount
  }

  // countByValue
  def wordCount8(sc: SparkContext): collection.Map[String, Long] = {
    val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark"))
    val words = rdd.flatMap(_.split(" "))
    val wordCount = words.countByValue()
    wordCount
  }

  // reduce, aggregate, fold
  def wordCount91011(sc: SparkContext): Unit = {
    val rdd = sc.makeRDD(List("Hello Scala", "Hello Spark"))
    val words = rdd.flatMap(_.split(" "))

    val mapWord = words.map(
      word => {
        mutable.Map[String, Long]((word, 1))
      }
    )

    mapWord.reduce(
      (map1, map2) => {
        map2.foreach{
          case (word, count) => {
            val newCount = map1.getOrElse(word, 0L) + count
            map1.update(word, newCount)
          }
        }
        map1
      }
    )
  }

}

标签:val,wordCount,笔记,学习,rdd,words,sc,Spark,Hello
来源: https://www.cnblogs.com/lotuslaw/p/15640169.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有