data = [
(1, "+1-202-5550137x123"),
(2, "+44-20-79461234#ext"),
(3, "+91-80-26541234abc"),
(4, "+1-408-1234567")
]
schema = ["id", "phone_number"]
# Create DataFrame
df = spark.createDataFrame(data, schema=schema)
df.show()
regex_pattern = '^\+\d{1,3}-\d{2,4}-\d{6,10}
```text
+---+-------------------+
| id| phone_number|
+---+-------------------+
| 1| +1-202-5550137x123|
| 2|+44-20-79461234#ext|
| 3| +91-80-26541234abc|
| 4| +1-408-1234567|
+---+-------------------+
+---+--------------+
| id| phone_number|
+---+--------------+
| 4|+1-408-1234567|
+---+--------------+
df.select("*").filter(col("phone_number").rlike(regex_pattern)).show()
{{CODE_BLOCK_1}}