• Spark 之 expression


    ##.

    /**
     * Returns the number of days from startDate to endDate.
     */
    @ExpressionDescription(
      usage = "_FUNC_(endDate, startDate) - Returns the number of days from `startDate` to `endDate`.",
      examples = """
        Examples:
          > SELECT _FUNC_('2009-07-31', '2009-07-30');
           1
    
          > SELECT _FUNC_('2009-07-30', '2009-07-31');
           -1
      """,
      group = "datetime_funcs",
      since = "1.5.0")
    case class DateDiff(endDate: Expression, startDate: Expression)
      extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
    
      override def left: Expression = endDate
      override def right: Expression = startDate
      override def inputTypes: Seq[AbstractDataType] = Seq(DateType, DateType)
      override def dataType: DataType = IntegerType
    
      override def nullSafeEval(end: Any, start: Any): Any = {
        end.asInstanceOf[Int] - start.asInstanceOf[Int]
      }
    
      override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
        defineCodeGen(ctx, ev, (end, start) => s"$end - $start")
      }
    
      override protected def withNewChildrenInternal(
          newLeft: Expression, newRight: Expression): DateDiff =
        copy(endDate = newLeft, startDate = newRight)
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    允许有默认参数的表达式
    // scalastyle:off line.size.limit
    @ExpressionDescription(
      usage = "_FUNC_([sourceTz, ]targetTz, sourceTs) - Converts the timestamp without time zone `sourceTs` from the `sourceTz` time zone to `targetTz`. ",
      arguments = """
        Arguments:
          * sourceTz - the time zone for the input timestamp.
                       If it is missed, the current session time zone is used as the source time zone.
          * targetTz - the time zone to which the input timestamp should be converted
          * sourceTs - a timestamp without time zone
      """,
      examples = """
        Examples:
          > SELECT _FUNC_('Europe/Brussels', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00');
           2021-12-05 15:00:00
          > SELECT _FUNC_('Europe/Brussels', timestamp_ntz'2021-12-05 15:00:00');
           2021-12-06 00:00:00
      """,
      group = "datetime_funcs",
      since = "3.4.0")
    // scalastyle:on line.size.limit
    case class ConvertTimezone(
        sourceTz: Expression,
        targetTz: Expression,
        sourceTs: Expression)
      extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
    
      def this(targetTz: Expression, sourceTs: Expression) =
        this(CurrentTimeZone(), targetTz, sourceTs)
    
      override def first: Expression = sourceTz
      override def second: Expression = targetTz
      override def third: Expression = sourceTs
    
      override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, TimestampNTZType)
      override def dataType: DataType = TimestampNTZType
    
      override def nullSafeEval(srcTz: Any, tgtTz: Any, micros: Any): Any = {
        DateTimeUtils.convertTimestampNtzToAnotherTz(
          srcTz.asInstanceOf[UTF8String].toString,
          tgtTz.asInstanceOf[UTF8String].toString,
          micros.asInstanceOf[Long])
      }
    
      override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
        val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
        defineCodeGen(ctx, ev, (srcTz, tgtTz, micros) =>
          s"""$dtu.convertTimestampNtzToAnotherTz($srcTz.toString(), $tgtTz.toString(), $micros)""")
      }
    
      override def prettyName: String = "convert_timezone"
    
      override protected def withNewChildrenInternal(
          newFirst: Expression,
          newSecond: Expression,
          newThird: Expression): ConvertTimezone = {
        copy(sourceTz = newFirst, targetTz = newSecond, sourceTs = newThird)
      }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    时间函数
      /**
       * Gets the difference between two timestamps.
       *
       * @param unit Specifies the interval units in which to express the difference between
       *             the two timestamp parameters.
       * @param startTs A timestamp which the function subtracts from `endTs`.
       * @param endTs A timestamp from which the function subtracts `startTs`.
       * @param zoneId The time zone ID at which the operation is performed.
       * @return The time span between two timestamp values, in the units specified.
       */
      def timestampDiff(unit: String, startTs: Long, endTs: Long, zoneId: ZoneId): Long = {
        val unitInUpperCase = unit.toUpperCase(Locale.ROOT)
        if (timestampDiffMap.contains(unitInUpperCase)) {
          val startLocalTs = getLocalDateTime(startTs, zoneId)
          val endLocalTs = getLocalDateTime(endTs, zoneId)
          timestampDiffMap(unitInUpperCase)(startLocalTs, endLocalTs)
        } else {
          throw new IllegalStateException(s"Got the unexpected unit '$unit'.")
        }
      }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    TimestampDiff

    旧版表达式写法

    // scalastyle:off line.size.limit
    @ExpressionDescription(
      usage = "_FUNC_(unit, startTimestamp, endTimestamp) - Gets the difference between the timestamps `endTimestamp` and `startTimestamp` in the specified units by truncating the fraction part.",
      arguments = """
        Arguments:
          * unit - this indicates the units of the difference between the given timestamps.
            Supported string values of `unit` are (case insensitive):
              - "YEAR"
              - "QUARTER" - 3 months
              - "MONTH"
              - "WEEK" - 7 days
              - "DAY"
              - "HOUR"
              - "MINUTE"
              - "SECOND"
              - "MILLISECOND"
              - "MICROSECOND"
          * startTimestamp - A timestamp which the expression subtracts from `endTimestamp`.
          * endTimestamp - A timestamp from which the expression subtracts `startTimestamp`.
      """,
      examples = """
        Examples:
          > SELECT _FUNC_('HOUR', timestamp_ntz'2022-02-11 20:30:00', timestamp_ntz'2022-02-12 04:30:00');
           8
          > SELECT _FUNC_('MONTH', timestamp_ltz'2022-01-01 00:00:00', timestamp_ltz'2022-02-28 00:00:00');
           1
          > SELECT _FUNC_(SECOND, date'2022-01-01', timestamp'2021-12-31 23:59:50');
           -10
          > SELECT _FUNC_(YEAR, timestamp'2000-01-01 01:02:03.123456', timestamp'2010-01-01 01:02:03.123456');
           10
      """,
      group = "datetime_funcs",
      since = "3.3.0")
    // scalastyle:on line.size.limit
    case class TimestampDiff(
        unit: Expression,
        startTimestamp: Expression,
        endTimestamp: Expression,
        timeZoneId: Option[String] = None)
      extends TernaryExpression
      with ImplicitCastInputTypes
      with NullIntolerant
      with TimeZoneAwareExpression {
    
      def this(unit: Expression, quantity: Expression, timestamp: Expression) =
        this(unit, quantity, timestamp, None)
    
      override def first: Expression = unit
      override def second: Expression = startTimestamp
      override def third: Expression = endTimestamp
    
      override def inputTypes: Seq[AbstractDataType] = Seq(StringType, TimestampType, TimestampType)
      override def dataType: DataType = LongType
    
      override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
        copy(timeZoneId = Option(timeZoneId))
    
      @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(endTimestamp.dataType)
    
      override def nullSafeEval(u: Any, startMicros: Any, endMicros: Any): Any = {
        DateTimeUtils.timestampDiff(
          u.asInstanceOf[UTF8String].toString,
          startMicros.asInstanceOf[Long],
          endMicros.asInstanceOf[Long],
          zoneIdInEval)
      }
    
      override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
        val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
        val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
        defineCodeGen(ctx, ev, (u, s, e) =>
          s"""$dtu.timestampDiff($u.toString(), $s, $e, $zid)""")
      }
    
      override def prettyName: String = "timestampdiff"
    
      override protected def withNewChildrenInternal(
          newFirst: Expression,
          newSecond: Expression,
          newThird: Expression): TimestampDiff = {
        copy(unit = newFirst, startTimestamp = newSecond, endTimestamp = newThird)
      }
    }
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84

    新版 String 传参数 写法

    // scalastyle:off line.size.limit
    @ExpressionDescription(
      usage = "_FUNC_(unit, startTimestamp, endTimestamp) - Gets the difference between the timestamps `endTimestamp` and `startTimestamp` in the specified units by truncating the fraction part.",
      arguments = """
        Arguments:
          * unit - this indicates the units of the difference between the given timestamps.
            Supported string values of `unit` are (case insensitive):
              - "YEAR"
              - "QUARTER" - 3 months
              - "MONTH"
              - "WEEK" - 7 days
              - "DAY"
              - "HOUR"
              - "MINUTE"
              - "SECOND"
              - "MILLISECOND"
              - "MICROSECOND"
          * startTimestamp - A timestamp which the expression subtracts from `endTimestamp`.
          * endTimestamp - A timestamp from which the expression subtracts `startTimestamp`.
      """,
      examples = """
        Examples:
          > SELECT _FUNC_(HOUR, timestamp_ntz'2022-02-11 20:30:00', timestamp_ntz'2022-02-12 04:30:00');
           8
          > SELECT _FUNC_(MONTH, timestamp_ltz'2022-01-01 00:00:00', timestamp_ltz'2022-02-28 00:00:00');
           1
          > SELECT _FUNC_(SECOND, date'2022-01-01', timestamp'2021-12-31 23:59:50');
           -10
          > SELECT _FUNC_(YEAR, timestamp'2000-01-01 01:02:03.123456', timestamp'2010-01-01 01:02:03.123456');
           10
      """,
      group = "datetime_funcs",
      since = "3.3.0")
    // scalastyle:on line.size.limit
    case class TimestampDiff(
        unit: String,
        startTimestamp: Expression,
        endTimestamp: Expression,
        timeZoneId: Option[String] = None)
      extends BinaryExpression
      with ImplicitCastInputTypes
      with NullIntolerant
      with TimeZoneAwareExpression {
    
      def this(unit: String, quantity: Expression, timestamp: Expression) =
        this(unit, quantity, timestamp, None)
    
      override def left: Expression = startTimestamp
      override def right: Expression = endTimestamp
    
      override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, TimestampType)
      override def dataType: DataType = LongType
    
      override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
        copy(timeZoneId = Option(timeZoneId))
    
      @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(endTimestamp.dataType)
    
      override def nullSafeEval(startMicros: Any, endMicros: Any): Any = {
        DateTimeUtils.timestampDiff(
          unit,
          startMicros.asInstanceOf[Long],
          endMicros.asInstanceOf[Long],
          zoneIdInEval)
      }
    
      override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
        val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
        val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
        defineCodeGen(ctx, ev, (s, e) =>
          s"""$dtu.timestampDiff("$unit", $s, $e, $zid)""")
      }
    
      override def prettyName: String = "timestampdiff"
    
      override def sql: String = {
        val childrenSQL = (unit +: children.map(_.sql)).mkString(", ")
        s"$prettyName($childrenSQL)"
      }
    
      override protected def withNewChildrenInternal(
          newLeft: Expression,
          newRight: Expression): TimestampDiff = {
        copy(startTimestamp = newLeft, endTimestamp = newRight)
      }
    }
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87

    lateral view

    https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-lateral-view.html

  • 相关阅读:
    带有酒店评论的情绪分析 - 处理数据
    基于大模型GPT,如何提炼出优质的Prompt
    Redisson 的主要方法
    运算符重载
    一个QT程序无法启动问题的分析与解决
    关于Django
    Java代码审计-SQL注入
    大数据-玩转数据-Python几种数据采集
    python机器人编程——用python实现一个写字机器人
    rsyslog实现将日志存储到mysql中
  • 原文地址:https://blog.csdn.net/zhixingheyi_tian/article/details/133654442