#Function
regexp_replace function from the pyspark.sql.functions module to perform regular expression-based replacements on string columns in a DataFrame.
Syntax: -
from pyspark.sql.functions import regexp_replace
regexp_replace(column, pattern, replacement)
Example: -
from pyspark.sql import SparkSession
from pyspark.sql.functions import regexp_replace
# Create a Spark session
spark = SparkSession.builder.appName("RegexpReplaceExample").getOrCreate()
# Sample DataFrame
data = [("hello123",), ("world456",), ("foo789",)]
df = spark.createDataFrame(data, ["text"])
# Replace digits with an empty string
df = df.withColumn("cleaned_text", regexp_replace("text", r"\d", ""))
df.show()
+---------+-------------+
| text| cleaned_text|
+---------+-------------+
| hello123| hello |
| world456| world |
| foo789| foo |
+---------+-------------+