用StructType模拟字典
- import string
-
- import pyspark.sql.functions as F
- from pyspark.sql.types import *
- import os
-
- import numpy as np
- import pandas as pd
- from pyspark.sql import SparkSession
- import pandas as pd
- import os
-
- if __name__ == '__main__':
- os.environ['SPARK_HOME'] = "/Users/jingjing.ma/Documents/spark-3.3.1-bin-hadoop3"
- spark = SparkSession \
- .builder \
- .master('local[*]') \
- .appName('try_test') \
- .getOrCreate()
- sc = spark.sparkContext
-
- # 创建RDD 并转换
- rdd = sc.parallelize([[1], [2], [3]])
- df = rdd.toDF(['line'])
-
-
- # 注册UDF
- def get_dict(data):
- return {'num': data, 'letter': string.ascii_letters[data]}
-
-
- func2 = spark.udf.register('func1', get_dict, StructType()
- .add("num", IntegerType(), nullable=True)
- .add("letter", StringType()))
- df.select(func2(df['line'])).show()
- df.selectExpr("func1(line)").show(truncate=False) # SQL风格
+-----------+
|func1(line)|
+-----------+
| {1, b}|
| {2, c}|
| {3, d}|
+-----------++-----------+
|func1(line)|
+-----------+
|{1, b} |
|{2, c} |
|{3, d} |
+-----------+
Process finished with exit code 0