Scala Programming Guidesscala-ecosystemscala-best-practices

Working with JSON, Databases, and Files | Scala Programming Guide

By Dmitri Meshin
Picture of the author
Published on
Working with JSON, Databases, and Files - Scala Programming Guide

JSON Serialization and Deserialization

Play JSON (Imperative Approach)

Play JSON uses explicit implicit formatters:

import play.api.libs.json._

// Domain model
case class Order(
  id: String,
  customerId: String,
  items: List[LineItem],
  total: BigDecimal,
  createdAt: Instant
)

case class LineItem(
  productId: String,
  quantity: Int,
  unitPrice: BigDecimal
)

// Explicit formatter for LineItem
implicit val lineItemFormat: Format[LineItem] = Json.format[LineItem]

// Custom formatter for Order with special handling
implicit val instantFormat: Format[Instant] = Format[Instant](
  Reads { json =>
    json.validate[Long].map(Instant.ofEpochMilli)
  },
  Writes { instant =>
    JsNumber(instant.toEpochMilli)
  }
)

implicit val orderFormat: Format[Order] = new Format[Order] {
  override def reads(json: JsValue): JsResult[Order] = for {
    id <- (json \ "id").validate[String]
    customerId <- (json \ "customer_id").validate[String]
    items <- (json \ "items").validate[List[LineItem]]
    total <- (json \ "total").validate[BigDecimal]
    createdAt <- (json \ "created_at").validate[Long]
      .map(Instant.ofEpochMilli)
  } yield Order(id, customerId, items, total, createdAt)

  override def writes(order: Order): JsValue = Json.obj(
    "id" -> order.id,
    "customer_id" -> order.customerId,
    "items" -> Json.toJson(order.items),
    "total" -> order.total,
    "created_at" -> order.createdAt.toEpochMilli
  )
}

// Usage
val json = Json.parse("""
  {
    "id": "ORD-2024-001",
    "customer_id": "CUST-123",
    "items": [
      {"product_id": "PROD-1", "quantity": 2, "unit_price": 29.99}
    ],
    "total": 59.98,
    "created_at": 1704067200000
  }
""")

val order: JsResult[Order] = json.validate[Order]
order match {
  case JsSuccess(validOrder, _) =>
    println(s"Order ID: ${validOrder.id}")
    val response = Json.toJson(validOrder)
  case JsError(errors) =>
    errors.foreach { case (path, errs) =>
      println(s"Error at $path: ${errs.map(_.message).mkString(", ")}")
    }
}

Circe (Functional, Type-Safe Approach)

Circe is purely functional with excellent error reporting:

import io.circe._
import io.circe.generic.semiauto._
import io.circe.parser._
import io.circe.syntax._

case class Order(
  id: String,
  customerId: String,
  items: List[LineItem],
  total: BigDecimal,
  createdAt: Instant
)

case class LineItem(
  productId: String,
  quantity: Int,
  unitPrice: BigDecimal
)

// Semi-automatic derivation (generates codec)
implicit val lineItemCodec: Codec[LineItem] = deriveCodec[LineItem]

// Custom codec for Order with field name mapping
implicit val orderCodec: Codec[Order] = {
  implicit val instantCodec: Codec[Instant] = Codec.from(
    Decoder.decodeLong.map(Instant.ofEpochMilli),
    Encoder.encodeLong.contramap(_.toEpochMilli)
  )

  deriveCodec[Order].transform(
    // Decoder: map snake_case JSON to camelCase
    hcursor =>
      for {
        id <- hcursor.get[String]("id")
        customerId <- hcursor.get[String]("customer_id")
        items <- hcursor.get[List[LineItem]]("items")
        total <- hcursor.get[BigDecimal]("total")
        createdAt <- hcursor.get[Instant]("created_at")
      } yield Order(id, customerId, items, total, createdAt),
    // Encoder: map camelCase to snake_case JSON
    order =>
      Json.obj(
        "id" -> order.id.asJson,
        "customer_id" -> order.customerId.asJson,
        "items" -> order.items.asJson,
        "total" -> order.total.asJson,
        "created_at" -> order.createdAt.asJson
      )
  )
}

// Parsing and encoding
val jsonString = """
  {
    "id": "ORD-2024-001",
    "customer_id": "CUST-123",
    "items": [
      {"product_id": "PROD-1", "quantity": 2, "unit_price": 29.99}
    ],
    "total": 59.98,
    "created_at": 1704067200000
  }
"""

// Decode: parse JSON and validate
val result: Either[Error, Order] = parse(jsonString).flatMap(_.as[Order])

result match {
  case Right(order) =>
    println(s"Successfully decoded order: ${order.id}")
    // Encode: convert back to JSON
    val encoded = order.asJson.spaces2
  case Left(error) =>
    println(s"Failed to decode: ${error.message}")
}

// Working with partial data (optional fields)
implicit val partialOrderCodec: Codec[PartialOrder] = deriveCodec[PartialOrder]

case class PartialOrder(
  id: String,
  customerId: String,
  items: Option[List[LineItem]] = None,
  notes: Option[String] = None
)

Database Access with Slick (Functional Relational Mapping)

Slick provides type-safe, composable SQL queries in Scala. The key innovation is "Functional Relational Mapping"—you write queries using Scala collections-like syntax, and Slick generates optimized SQL behind the scenes. This combines the safety of type checking with the performance of direct SQL, eliminating entire categories of bugs from runtime query failures.

Understanding Slick's query composition model is essential for building maintainable data access layers. Queries are composable values—you can build up complex queries from simpler ones using map, filter, flatMap, and other familiar functional operations. The magic happens when Slick lowers these Scala operations to SQL: it understands your query intent and generates efficient SQL without N+1 queries or Cartesian products (unless you explicitly write them).

The impedance mismatch between Scala's type system and SQL's schema manifests in subtle ways. Custom column types, handling NULL values, transaction management, and connection pooling all require careful attention. The action/effect model in Slick separates query definition from execution, allowing you to build complex data access patterns that compose cleanly. However, this also means you must understand when queries are executed: lazy composition vs. eager execution is a critical distinction.

import slick.jdbc.PostgresProfile.api._
import slick.lifted.{Tag, TableQuery}
import slick.sql.SqlProfile.ColumnOption.NotNull
import scala.concurrent.Future

// Define database schema as Scala classes
class UsersTable(tag: Tag) extends Table[User](tag, "users") {
  def id = column[String]("id", O.PrimaryKey)
  def email = column[String]("email", O.Unique)
  def name = column[String]("name")
  def createdAt = column[Instant]("created_at")
  def subscriptionTier = column[String]("subscription_tier")

  // Map table columns to User case class
  def * = (id, email, name, createdAt, subscriptionTier)
    <> ((User.apply _).tupled, User.unapply)
}

// Query object for convenient access
val users = TableQuery[UsersTable]

// Define foreign key relationships
class OrdersTable(tag: Tag) extends Table[Order](tag, "orders") {
  def id = column[String]("id", O.PrimaryKey)
  def userId = column[String]("user_id")
  def total = column[BigDecimal]("total")
  def createdAt = column[Instant]("created_at")

  def * = (id, userId, total, createdAt)
    <> ((Order.apply _).tupled, Order.unapply)

  // Foreign key to users table
  def userFk = foreignKey("user_fk", userId, users)(_.id, onDelete = ForeignKeyAction.Cascade)
}

val orders = TableQuery[OrdersTable]

// CRUD operations with composable queries
class UserRepository(db: Database) {
  // Create
  def insert(user: User): Future[Int] = {
    db.run(users += user)
  }

  // Read by ID
  def findById(id: String): Future[Option[User]] = {
    db.run(
      users.filter(_.id === id).result.headOption
    )
  }

  // Read with filtering
  def findByEmail(email: String): Future[Option[User]] = {
    db.run(
      users.filter(_.email === email).result.headOption
    )
  }

  // List all with pagination
  def listPaginated(offset: Int, limit: Int): Future[List[User]] = {
    db.run(
      users
        .sortBy(_.createdAt.desc)
        .drop(offset)
        .take(limit)
        .result
        .map(_.toList)
    )
  }

  // Update
  def update(id: String, user: User): Future[Int] = {
    db.run(
      users.filter(_.id === id).update(user)
    )
  }

  // Delete
  def delete(id: String): Future[Int] = {
    db.run(
      users.filter(_.id === id).delete
    )
  }

  // Complex query: find users with recent orders
  def findActiveUsers(daysActive: Int): Future[List[User]] = {
    val cutoffDate = Instant.now().minus(Duration.ofDays(daysActive))

    db.run(
      users
        .filter { u =>
          orders
            .filter(_.userId === u.id)
            .filter(_.createdAt > cutoffDate)
            .exists
        }
        .result
        .map(_.toList)
    )
  }

  // Aggregation: users grouped by subscription tier
  def usersByTier(): Future[Map[String, Int]] = {
    db.run(
      users
        .groupBy(_.subscriptionTier)
        .map { case (tier, group) => (tier, group.length) }
        .result
        .map(_.toMap)
    )
  }

  // Transaction: create user and log the action
  def insertWithAudit(user: User, auditLog: AuditEntry): Future[Int] = {
    db.run(
      (users += user)
        .andThen(auditLogs += auditLog)
        .transactionally
    )
  }
}

// Database configuration
val db = Database.forConfig("default")  // Reads from application.conf

// Usage
val userRepo = new UserRepository(db)

val newUser = User(
  id = "USR-001",
  email = "alice@example.com",
  name = "Alice Smith",
  createdAt = Instant.now(),
  subscriptionTier = "premium"
)

userRepo.insert(newUser).flatMap { _ =>
  userRepo.findByEmail("alice@example.com")
}.map {
  case Some(user) => println(s"Created user: ${user.name}")
  case None => println("User not found")
}

Database Access with Doobie (Pure Functional JDBC)

Doobie provides purely functional JDBC without code generation. Rather than abstracting away SQL, Doobie encourages you to write SQL directly while gaining type safety and pure functional composition. The magic is how it combines static type checking with raw SQL. The query syntax allows Doobie's macro system to extract interpolated SQL fragments, type-check parameter placeholders against your Scala types, and generate appropriate JDBC calls. This approach has several advantages: the SQL is exactly what you write (no query translation surprises), it works with any SQL your database supports, and when performance matters you're never fighting an abstraction layer.

The monadic composition model in Doobie flows naturally from cats-effect: queries return ConnectionIO values that describe database operations without executing them. Composing queries using for-comprehensions is clean and readable. Transaction management is explicit and type-safe. Resource management (connections, result sets, prepared statements) is automatically handled through cats-effect's Resource type. However, you lose compile-time query validation—your schema changes won't be caught until runtime—so integration tests become critical.

import doobie._
import doobie.implicits._
import cats.effect.IO
import scala.concurrent.duration._

// Connection transactor (manages database connections)
val xa = Transactor.fromDriverManager[IO](
  driver = "org.postgresql.Driver",
  url = "jdbc:postgresql://localhost:5432/mydb",
  user = "postgres",
  password = "password"
)

// SQL fragments with parameter binding (type-safe)
case class User(id: String, email: String, name: String)

object UserQueries {
  // Read by ID
  def findById(id: String): ConnectionIO[Option[User]] =
    sql"SELECT id, email, name FROM users WHERE id = $id"
      .query[User]
      .option

  // Insert user
  def insertUser(user: User): ConnectionIO[Int] =
    sql"INSERT INTO users (id, email, name) VALUES (${user.id}, ${user.email}, ${user.name})"
      .update
      .run

  // List all with pagination
  def listPaginated(offset: Int, limit: Int): ConnectionIO[List[User]] =
    sql"SELECT id, email, name FROM users ORDER BY id DESC OFFSET $offset LIMIT $limit"
      .query[User]
      .to[List]

  // Count users with filter
  def countByDomain(emailDomain: String): ConnectionIO[Int] =
    sql"SELECT COUNT(*) FROM users WHERE email LIKE ${"%" + emailDomain}"
      .query[Int]
      .unique

  // Update with dynamic SQL
  def update(id: String, newEmail: String): ConnectionIO[Int] =
    sql"UPDATE users SET email = $newEmail WHERE id = $id"
      .update
      .run

  // Delete
  def deleteById(id: String): ConnectionIO[Int] =
    sql"DELETE FROM users WHERE id = $id".update.run

  // Join query
  def findUserOrders(userId: String): ConnectionIO[List[(User, Order)]] =
    sql"""
      SELECT u.id, u.email, u.name, o.id, o.total, o.created_at
      FROM users u
      LEFT JOIN orders o ON u.id = o.user_id
      WHERE u.id = $userId
    """
      .query[(User, Order)]
      .to[List]

  // Aggregation with GROUP BY
  def ordersByMonth(): ConnectionIO[List[(YearMonth, BigDecimal)]] =
    sql"""
      SELECT DATE_TRUNC('month', created_at), SUM(total)
      FROM orders
      GROUP BY DATE_TRUNC('month', created_at)
      ORDER BY DATE_TRUNC('month', created_at) DESC
    """
      .query[(YearMonth, BigDecimal)]
      .to[List]
}

// Transaction: atomicity guaranteed
def createUserWithAudit(user: User): ConnectionIO[Unit] = {
  for {
    _ <- UserQueries.insertUser(user)
    _ <- sql"INSERT INTO audit_log (action, user_id) VALUES ('CREATE', ${user.id})"
      .update
      .run
  } yield ()
}

// Execute on IO
val program: IO[Option[User]] = UserQueries.findById("USR-001").transact(xa)

program.unsafeRunSync() match {
  case Some(user) => println(s"Found: ${user.email}")
  case None => println("Not found")
}

// Custom type mappings for domain types
implicit val instantMeta: Meta[Instant] = Meta[Long].imap(Instant.ofEpochMilli)(_.toEpochMilli)
implicit val moneyMeta: Meta[Money] = Meta[BigDecimal].imap(Money(_))(_.amount)

File I/O

Reading and Writing Files

import scala.io.Source
import java.nio.file.{Files, Paths, StandardOpenOption}
import java.io.{PrintWriter, BufferedReader}
import cats.effect.{IO, Resource}
import fs2.{io, text}

// Simple file reading (string)
def readFileSimple(path: String): String = {
  val source = Source.fromFile(path)
  try {
    source.getLines().mkString("\n")
  } finally {
    source.close()
  }
}

// Functional file reading with Resource (automatic cleanup)
def readFileLines(path: String): IO[List[String]] = {
  Resource
    .fromAutoCloseable(IO(Source.fromFile(path)))
    .use { source =>
      IO(source.getLines().toList)
    }
}

// Stream-based reading (memory-efficient for large files)
def readFileStream(path: String): fs2.Stream[IO, String] = {
  io.file
    .readAll(Paths.get(path), 8192)
    .through(text.utf8.decode)
    .through(text.lines)
}

// Writing files
def writeFile(path: String, content: String): IO[Unit] = {
  IO(Files.write(
    Paths.get(path),
    content.getBytes("UTF-8")
  )).void
}

// Appending to file
def appendFile(path: String, line: String): IO[Unit] = {
  IO(Files.write(
    Paths.get(path),
    (line + "\n").getBytes("UTF-8"),
    StandardOpenOption.APPEND,
    StandardOpenOption.CREATE
  )).void
}

// Working with directories
def listFiles(dirPath: String): List[String] = {
  Files
    .list(Paths.get(dirPath))
    .filter(Files.isRegularFile(_))
    .map(_.getFileName.toString)
    .toArray
    .map(_.toString)
    .toList
}

// Recursive directory traversal
def walkDirectory(dirPath: String): fs2.Stream[IO, String] = {
  def go(path: java.nio.file.Path): fs2.Stream[IO, String] = {
    if (Files.isRegularFile(path)) {
      fs2.Stream(path.toString)
    } else if (Files.isDirectory(path)) {
      val children = IO(Files.list(path)).bracket(
        s => IO(s.toArray.toList.asInstanceOf[List[java.nio.file.Path]])
      )(s => IO(s.close()))

      fs2.Stream.eval(children).flatMap(fs2.Stream.emits(_)).flatMap(go)
    } else {
      fs2.Stream.empty
    }
  }

  go(Paths.get(dirPath))
}

// Safe file processing with error handling
def processLargeFile(path: String)(processLine: String => IO[Unit]): IO[Unit] = {
  readFileStream(path)
    .evalMap(processLine)
    .compile
    .drain
    .handleError { e =>
      println(s"Error processing file: ${e.getMessage}")
    }
}

// Example: count lines in a file
def countLines(path: String): IO[Int] = {
  readFileStream(path)
    .fold(0)((count, _) => count + 1)
    .compile
    .head
    .map(_.getOrElse(0))
}

Working with CSV

import com.github.tototoshi.csv._

case class Product(id: String, name: String, price: BigDecimal, stock: Int)

// Read CSV file
def readProductsCSV(path: String): List[Product] = {
  val reader = CSVReader.open(path)
  try {
    reader
      .allWithHeaders()  // Use first row as headers
      .map { row =>
        Product(
          id = row("id"),
          name = row("name"),
          price = BigDecimal(row("price")),
          stock = row("stock").toInt
        )
      }
  } finally {
    reader.close()
  }
}

// Write CSV file
def writeProductsCSV(path: String, products: List[Product]): Unit = {
  val writer = CSVWriter.open(path)
  try {
    // Write header
    writer.writeRow(Seq("id", "name", "price", "stock"))

    // Write data rows
    products.foreach { p =>
      writer.writeRow(Seq(p.id, p.name, p.price.toString(), p.stock.toString()))
    }
  } finally {
    writer.close()
  }
}

// Stream-based CSV processing
def processCSVStream(inputPath: String, outputPath: String): Unit = {
  val reader = CSVReader.open(inputPath)
  val writer = CSVWriter.open(outputPath)

  try {
    // Copy header
    reader.iterator.take(1).foreach(writer.writeRow)

    // Process data rows
    reader
      .iterator
      .drop(1)  // Skip header
      .map { row =>
        // Transform row
        row.updated(2, (BigDecimal(row(2)) * 1.1).toString())  // 10% price increase
      }
      .foreach(writer.writeRow)
  } finally {
    reader.close()
    writer.close()
  }
}

Working with XML

import scala.xml._

// Parse XML
val xmlString = """
  <orders>
    <order id="ORD-001">
      <customer>Alice</customer>
      <total>99.99</total>
    </order>
    <order id="ORD-002">
      <customer>Bob</customer>
      <total>149.99</total>
    </order>
  </orders>
"""

val root = XML.loadString(xmlString)

// Extract elements
val orders = (root \ "order")  // Direct children matching tag name
orders.foreach { order =>
  val id = (order \ "@id").text  // Get attribute
  val customer = (order \ "customer").text
  val total = (order \ "total").text.toDouble
  println(s"Order $id: $customer - $$${total}")
}

// Create XML
def ordersToXML(orders: List[Order]): Elem = {
  <orders>
    {
      orders.map { o =>
        <order id={o.id}>
          <customer>{o.customerId}</customer>
          <total>{o.total.toString()}</total>
        </order>
      }
    }
  </orders>
}

// Write to file
val xml = ordersToXML(myOrders)
XML.save("orders.xml", xml, "UTF-8")

// Validate with XSD (requires external library)
import com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl
val factory = new DocumentBuilderFactoryImpl()
factory.setSchema(/* XSD schema */)
val docBuilder = factory.newDocumentBuilder()
val doc = docBuilder.parse("orders.xml")
// If valid, no exception thrown