test_df.printSchema()
root
|-- features: vector (nullable = true)
|-- output: double (nullable = true)
|-- prediction: double (nullable = false)
- vector_udf = F.udf(lambda vector: vector.toArray().tolist(), ArrayType(FloatType()))
- flattened_df = test_df.withColumn('col1', vector_udf('features'))
-
- flattened_df.printSchema()
root
|-- features: vector (nullable = true)
|-- output: double (nullable = true)
|-- prediction: double (nullable = false)
|-- col1: array (nullable = true)
| |-- element: float (containsNull = true)
- list4tojson = ['col1']
- #保存文件
- flattened_df.select('output', *[F.to_json(x) for x in list4tojson]).coalesce(1).write.csv('test_001.csv')
-
- save_df=flattened_df.select('output',"prediction", *[F.to_json(x) for x in list4tojson])
-
- save_df.printSchema()
root
|-- output: double (nullable = true)
|-- prediction: double (nullable = false)
|-- to_json(col1): string (nullable = true)