• Flink学习15:Flink自定义数据源


     

     

    自定义数据源,核心3步:

    1.创建一个类,用来指定数据流中的数据类型

    2.创建一个数据源的类,继承RichSourceFunction等类,并重写run 和cancel 方法

    3.在main方法中,生成环境后,把自定义的数据源的类,通过addSource 加入到环境中

    import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}
    import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
    
    import org.apache.flink.api.scala._
    import java.util.Calendar
    import scala.util.Random
    
    
    //defined the stockPrice attribute
    case class StockPrice(stockID:String, timestamp:Long, price:Double)
    
    //to define myself dataSource
    class StockPriceSource extends RichSourceFunction[StockPrice]{
    
      var isRunning: Boolean =true
      val rand = new Random()
    
      //initialize the stock price
      private var priceList = List(10.0d, 20.0d, 30.0d, 40.0d, 50.0d)
      var stockId =0
      var curPrice =0.0d
    
    
      //when we defined our dataSource function, we must override Two function :run and cancel
      override def run(sourceContext: SourceFunction.SourceContext[StockPrice]): Unit = {
    
        while (isRunning){
          //change the stock price random
    
          //get the stockid by random
          stockId=rand.nextInt(priceList.size)
    
          //generate the random price
          val curPrice = priceList(stockId) + rand.nextGaussian() * 0.05
    
          //update the stock price list
          priceList = priceList.updated(stockId,curPrice)
    
          //create the time stamp
          val curTime = Calendar.getInstance.getTimeInMillis
    
          //add my data source to sourceContext
          sourceContext.collect(StockPrice("stock_"+stockId.toString, curTime, curPrice))
    
          //thread sleep
          Thread.sleep(rand.nextInt(10))
    
        }
      }
    
      override def cancel(): Unit = {
        //cancel the run function
        isRunning=false
      }
    }
    
    
    
    object myDataSourceTest {
    
    
      def main(args: Array[String]): Unit = {
    
        //create env
        val env = StreamExecutionEnvironment.getExecutionEnvironment
    
        //set the parallelism
        env.setParallelism(1)
    
        //create my dataSource
        val stockPriceStream: DataStream[StockPrice] = env.addSource(new StockPriceSource)
    
        //print
        stockPriceStream.print()
    
        //execute
        env.execute("stock price streaming")
    
      }
    
    
    }
    
  • 相关阅读:
    万物皆可Cassandra:HUAWEI Tag背后的神仙数据库
    JCIM2021 | MolGPT : 基于Transformer-Decoder的分子生成
    work环境配置
    linux上mysql 8.0安装
    如何控制MySQL事务提交后,刷redo-log的策略?
    【新刊邀稿】1区计算机类SCI&EI,迅速抢占版面,就差你了~
    深入理解Golang之Map
    webpack5零基础入门-5使用webpack处理stylus文件
    aspose-words去水印自用资源
    【软件测试】02 -- 软件缺陷管理
  • 原文地址:https://blog.csdn.net/hzp666/article/details/126247313