spark, scala

Transpose data with Spark

A short user defined function written in Scala which allows you to transpose a dataframe without performing aggregation functions. To perform a transpose with aggregations, see the pivot method.

// Import the requisite methods
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions.{array, col, explode, lit, struct}

// Create a dataframe
val df = spark.createDataFrame(Seq(
  (1, 1, 2, 3, 8, 4, 5),
  (2, 4, 3, 8, 7, 9, 8),
  (3, 6, 1, 9, 2, 3, 6),
  (4, 7, 8, 6, 9, 4, 5),
  (5, 9, 2, 7, 8, 7, 3),
  (6, 1, 1, 4, 2, 8, 4)
)).toDF("uid", "col1", "col2", "col3", "col4", "col5", "col6")

df.show(10,false)

// Create the transpose user defined function.
// Imputs:
//   transDF: The dataframe which will be transposed
//   transBy: The column that the dataframe will be transposed by
// Outputs:
//   Dataframe datatype consisting of three columns:
//     transBy
//     column_name
//     column_value
def transposeUDF(transDF: DataFrame, transBy: Seq[String]): DataFrame = {
  val (cols, types) = transDF.dtypes.filter{ case (c, _) => !transBy.contains(c)}.unzip
  require(types.distinct.size == 1)      

  val kvs = explode(array(
    cols.map(c => struct(lit(c).alias("column_name"), col(c).alias("column_value"))): _*
  ))

  val byExprs = transBy.map(col(_))

  transDF
    .select(byExprs :+ kvs.alias("_kvs"): _*)
    .select(byExprs ++ Seq($"_kvs.column_name", $"_kvs.column_value"): _*)
}

transposeUDF(df, Seq("uid")).show(12,false)



Output:

df.show(10,false)
+---+----+----+----+----+----+----+
|uid|col1|col2|col3|col4|col5|col6|
+---+----+----+----+----+----+----+
|1  |1   |2   |3   |8   |4   |5   |
|2  |4   |3   |8   |7   |9   |8   |
|3  |6   |1   |9   |2   |3   |6   |
|4  |7   |8   |6   |9   |4   |5   |
|5  |9   |2   |7   |8   |7   |3   |
|6  |1   |1   |4   |2   |8   |4   |
+---+----+----+----+----+----+----+


transposeUDF(df, Seq("uid")).show(12,false) +---+-----------+------------+ |uid|column_name|column_value| +---+-----------+------------+ |1 |col1 |1 | |1 |col2 |2 | |1 |col3 |3 | |1 |col4 |8 | |1 |col5 |4 | |1 |col6 |5 | |2 |col1 |4 | |2 |col2 |3 | |2 |col3 |8 | |2 |col4 |7 | |2 |col5 |9 | |2 |col6 |8 | +---+-----------+------------+ only showing top 12 rows
Author image

About James Conner

Scuba dive master, wildlife photographer, anthropologist, programmer, electronics tinkerer and big data expert.
You've successfully subscribed to My Areas of Expertise
Great! Next, complete checkout for full access to My Areas of Expertise
Welcome back! You've successfully signed in.
Unable to sign you in. Please try again.
Success! Your account is fully activated, you now have access to all content.
Error! Stripe checkout failed.
Success! Your billing info is updated.
Error! Billing info update failed.