spark, scala

Renaming All Columns In A Spark DataFrame

Here's an easy example of how to rename all columns in an Apache Spark DataFrame. Tehcnically, we're really creating a second DataFrame with the correct names.

// IMPORT DEPENDENCIES
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{SQLContext, Row, DataFrame, Column}
import org.apache.spark.ml.feature.VectorAssembler


// Create the example dataframe without column names
val firstDF = spark.createDataFrame(Seq(
  (1, 1, 2, 3, 8, 4, 5),
  (2, 4, 3, 8, 7, 9, 8),
  (3, 6, 1, 9, 2, 3, 6),
  (4, 7, 8, 6, 9, 4, 5),
  (5, 9, 2, 7, 8, 7, 3),
  (6, 1, 1, 4, 2, 8, 4)
)).toDF()
firstDF: org.apache.spark.sql.DataFrame = [_1: int, _2: int ... 5 more fields]


firstDF.printSchema
root
 |-- _1: integer (nullable = false)
 |-- _2: integer (nullable = false)
 |-- _3: integer (nullable = false)
 |-- _4: integer (nullable = false)
 |-- _5: integer (nullable = false)
 |-- _6: integer (nullable = false)
 |-- _7: integer (nullable = false)


val colNames = Seq("uid", "col1", "col2", "col3", "col4", "col5", "col6")
colNames: Seq[String] = List(uid, col1, col2, col3, col4, col5, col6)


val secondDF = firstDF.toDF(colNames: _*)
secondDF: org.apache.spark.sql.DataFrame = [uid: int, col1: int ... 5 more fields]


secondDF.printSchema
root
 |-- uid: integer (nullable = false)
 |-- col1: integer (nullable = false)
 |-- col2: integer (nullable = false)
 |-- col3: integer (nullable = false)
 |-- col4: integer (nullable = false)
 |-- col5: integer (nullable = false)
 |-- col6: integer (nullable = false)


secondDF.show(6)
+---+----+----+----+----+----+----+
|uid|col1|col2|col3|col4|col5|col6|
+---+----+----+----+----+----+----+
|  1|   1|   2|   3|   8|   4|   5|
|  2|   4|   3|   8|   7|   9|   8|
|  3|   6|   1|   9|   2|   3|   6|
|  4|   7|   8|   6|   9|   4|   5|
|  5|   9|   2|   7|   8|   7|   3|
|  6|   1|   1|   4|   2|   8|   4|
+---+----+----+----+----+----+----+
Author image

About James Conner

Scuba dive master, wildlife photographer, anthropologist, programmer, electronics tinkerer and big data expert.