用StructType模拟字典
import stringimport pyspark.sql.functions as F
from pyspark.sql.types import *
import osimport numpy as np
import pandas as pd
from pyspark.sql import SparkSession
import pandas as pd
import osif __name__ == '__main__':os.environ['SPARK_HOME'] = "/Users/jingjing.ma/Documents/spark-3.3.1-bin-hadoop3"spark = SparkSession \.builder \.master('local[*]') \.appName('try_test') \.getOrCreate()sc = spark.sparkContext# 创建RDD 并转换rdd = sc.parallelize([[1], [2], [3]])df = rdd.toDF(['line'])# 注册UDFdef get_dict(data):return {'num': data, 'letter': string.ascii_letters[data]}func2 = spark.udf.register('func1', get_dict, StructType().add("num", IntegerType(), nullable=True).add("letter", StringType()))df.select(func2(df['line'])).show()df.selectExpr("func1(line)").show(truncate=False) # SQL风格
+-----------+
|func1(line)|
+-----------+
| {1, b}|
| {2, c}|
| {3, d}|
+-----------++-----------+
|func1(line)|
+-----------+
|{1, b} |
|{2, c} |
|{3, d} |
+-----------+
Process finished with exit code 0
下一篇:【git 介绍】AhuntSun