UDF in pyspark (User defined function)
#python #pysparktutorial #pyspark #data #data engineering
data = [
(1, "John", 25),
(2, "neeta", 30),
(3, "ashish", 35),
(4, "ram", 55)
]
columns = ["id", "name", "age"]
df = spark.createDataFrame(data, columns)
df.show()
def age_cat(age):
if age =30:
return "Young"
elif age 30 and age 40:
return "Adult"
else:
return "Senior"
age_bar=udf(age_cat,StringType())
from pyspark.sql.types import *
from pyspark.sql.functions import *
df1=df.withColumn("Age_Indication",age_bar(col("age")))
df1.show()