data = [
(1, ),
(2,),
(3,),
(6,),
(7,),
(8,)]
schema="Id int"
df = spark.createDataFrame(data,schema=schema)
df.show()
+---+
| Id|
+---+
| 1|
| 2|
| 3|
| 6|
| 7|
| 8|
+---+
df_list = df.select(min("Id"),max("Id"))
df_list.show()
+-------+-------+
|min(Id)|max(Id)|
+-------+-------+
| 1| 8|
+-------+-------+
first()
min_num = df_list.first()[0]
max_num = df_list.first()[1]
# print(min_num)
# print(max_num)
1
8
df_new = spark.range(min_num,max_num+1)
df_new.show()
+---+
| id|
+---+
| 1|
| 2|
| 3|
| 4|
| 5|
| 6|
| 7|
| 8|
+---+
exceptAll()
df_res = df_new.exceptAll(df)
df_res.show()
+---+
| id|
+---+
| 5|
| 4|
+---+