REGEXP

data = [
    (1, "+1-202-5550137x123"),
    (2, "+44-20-79461234#ext"),
    (3, "+91-80-26541234abc"),
    (4, "+1-408-1234567")
]

schema = ["id", "phone_number"]

# Create DataFrame
df = spark.createDataFrame(data, schema=schema)

df.show()

regex_pattern = '^\+\d{1,3}-\d{2,4}-\d{6,10}
```text
+---+-------------------+
| id|       phone_number|
+---+-------------------+
|  1| +1-202-5550137x123|
|  2|+44-20-79461234#ext|
|  3| +91-80-26541234abc|
|  4|     +1-408-1234567|
+---+-------------------+

+---+--------------+
| id|  phone_number|
+---+--------------+
|  4|+1-408-1234567|
+---+--------------+

df.select("*").filter(col("phone_number").rlike(regex_pattern)).show()

{{CODE_BLOCK_1}}