Chapter 34 Writing Data

What You’ll Learn:

  • Writing CSV and text files
  • Excel export
  • Common export errors
  • File permissions
  • Data serialization
  • Format preservation

Key Errors Covered: 18+ export errors

Difficulty: ⭐⭐ Intermediate

34.1 Introduction

Writing data is just as important as reading:

library(readr)
library(dplyr)

# Writing CSV
# write_csv(data, "output.csv")

34.2 Writing CSV Files

💡 Key Insight: Base R vs readr

# Create sample data
data <- mtcars[1:5, 1:5]

# Base R
write.csv(data, "base_output.csv")
# - Adds row names by default
# - Quotes strings
# - Slower

# readr
write_csv(data, "readr_output.csv")
# - No row names
# - Only quotes when needed
# - Faster
# - Better handling of special values

# Compare
cat("Base R file:\n")
#> Base R file:
cat(paste(head(readLines("base_output.csv"), 3), collapse = "\n"))
#> "","mpg","cyl","disp","hp","drat"
#> "Mazda RX4",21,6,160,110,3.9
#> "Mazda RX4 Wag",21,6,160,110,3.9

cat("\n\nreadr file:\n")
#> 
#> 
#> readr file:
cat(paste(head(readLines("readr_output.csv"), 3), collapse = "\n"))
#> mpg,cyl,disp,hp,drat
#> 21,6,160,110,3.9
#> 21,6,160,110,3.9

34.3 Error #1: cannot open the connection

⭐ BEGINNER 📁 PATH

34.3.1 The Error

# Try to write to non-existent directory
write_csv(mtcars, "nonexistent_dir/file.csv")
#> Error: Cannot open file for writing:
#> * 'nonexistent_dir/file.csv'

🔴 ERROR

Error: Cannot open file for writing:
'nonexistent_dir/file.csv'

34.3.2 What It Means

Directory doesn’t exist or no write permission.

34.3.3 Solutions

SOLUTION 1: Create Directory First

# Check if directory exists
output_dir <- "output"

if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
  cat("Created directory:", output_dir, "\n")
}
#> Created directory: output

# Now write
write_csv(mtcars, file.path(output_dir, "data.csv"))
cat("File written successfully\n")
#> File written successfully

SOLUTION 2: Safe Write Function

safe_write_csv <- function(data, path, ...) {
  # Get directory
  dir_path <- dirname(path)
  
  # Create if doesn't exist
  if (!dir.exists(dir_path)) {
    dir.create(dir_path, recursive = TRUE)
    message("Created directory: ", dir_path)
  }
  
  # Check write permission
  if (!file.access(dir_path, mode = 2) == 0) {
    stop("No write permission for directory: ", dir_path)
  }
  
  # Write file
  write_csv(data, path, ...)
  
  # Verify
  if (file.exists(path)) {
    message("Successfully wrote ", nrow(data), " rows to ", path)
  } else {
    stop("File was not created")
  }
  
  invisible(path)
}

# Test
safe_write_csv(mtcars, "test_output/cars.csv")
#> Created directory: test_output
#> Successfully wrote 32 rows to test_output/cars.csv

34.4 Writing Options

💡 Key Insight: Write Options

# Control output format
write_csv(mtcars, "formatted.csv",
          na = "MISSING",           # How to write NAs
          quote = "all")            # Quote all fields

# Append to existing file
write_csv(mtcars[1:5, ], "append_test.csv")
write_csv(mtcars[6:10, ], "append_test.csv", append = TRUE)

cat("Lines in file:", length(readLines("append_test.csv")), "\n")
#> Lines in file: 11

# Write with semicolon delimiter (European)
write_csv2(mtcars, "european.csv")

# Custom delimiter
write_delim(mtcars, "pipe_delim.txt", delim = "|")

# Tab-separated
write_tsv(mtcars, "tab_separated.txt")

34.5 Writing Excel Files

💡 Key Insight: writexl Package

library(writexl)

# Single sheet
write_xlsx(mtcars, "cars.xlsx")

# Multiple sheets
write_xlsx(
  list(
    Cars = mtcars,
    Iris = iris,
    Summary = data.frame(
      Dataset = c("mtcars", "iris"),
      Rows = c(nrow(mtcars), nrow(iris))
    )
  ),
  "multi_sheet.xlsx"
)

# Verify
library(readxl)
excel_sheets("multi_sheet.xlsx")
#> [1] "Cars"    "Iris"    "Summary"

34.6 Error #2: File Permission Denied

⭐⭐ INTERMEDIATE 💻 SYSTEM

34.6.1 The Problem

# File is open in Excel or locked
write_csv(data, "open_file.csv")
# Error: Permission denied

34.6.2 Solutions

SOLUTION 1: Check File Access

check_file_writable <- function(path) {
  # Check if file exists and is writable
  if (file.exists(path)) {
    if (file.access(path, mode = 2) == 0) {
      cat("File is writable\n")
      return(TRUE)
    } else {
      warning("File exists but is not writable (may be open)")
      return(FALSE)
    }
  } else {
    # Check if directory is writable
    dir_path <- dirname(path)
    if (file.access(dir_path, mode = 2) == 0) {
      cat("Directory is writable\n")
      return(TRUE)
    } else {
      warning("No write permission for directory")
      return(FALSE)
    }
  }
}

# Test
check_file_writable("test.csv")
#> Directory is writable
#> [1] TRUE

SOLUTION 2: Use Temporary File

# Write to temp file first
temp_file <- tempfile(fileext = ".csv")
write_csv(mtcars, temp_file)

# Then copy/move to destination
final_path <- "output/final.csv"
if (!dir.exists("output")) dir.create("output")

file.copy(temp_file, final_path, overwrite = TRUE)
#> [1] TRUE
cat("File written via temporary location\n")
#> File written via temporary location

34.7 Preserving Data Types

🎯 Best Practice: Preserve Types

library(lubridate)

# Create data with various types
complex_data <- tibble(
  id = 1:3,
  name = c("Alice", "Bob", "Charlie"),
  value = c(1.5, 2.3, 3.7),
  date = ymd("2024-01-01") + days(0:2),
  logical = c(TRUE, FALSE, TRUE),
  factor_col = factor(c("A", "B", "C"))
)

# CSV loses some type info
write_csv(complex_data, "types_csv.csv")
from_csv <- read_csv("types_csv.csv", show_col_types = FALSE)
str(from_csv)
#> spc_tbl_ [3 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
#>  $ id        : num [1:3] 1 2 3
#>  $ name      : chr [1:3] "Alice" "Bob" "Charlie"
#>  $ value     : num [1:3] 1.5 2.3 3.7
#>  $ date      : Date[1:3], format: "2024-01-01" "2024-01-02" ...
#>  $ logical   : logi [1:3] TRUE FALSE TRUE
#>  $ factor_col: chr [1:3] "A" "B" "C"
#>  - attr(*, "spec")=
#>   .. cols(
#>   ..   id = col_double(),
#>   ..   name = col_character(),
#>   ..   value = col_double(),
#>   ..   date = col_date(format = ""),
#>   ..   logical = col_logical(),
#>   ..   factor_col = col_character()
#>   .. )
#>  - attr(*, "problems")=<externalptr>

# RDS preserves everything
saveRDS(complex_data, "types_rds.rds")
from_rds <- readRDS("types_rds.rds")
str(from_rds)
#> tibble [3 × 6] (S3: tbl_df/tbl/data.frame)
#>  $ id        : int [1:3] 1 2 3
#>  $ name      : chr [1:3] "Alice" "Bob" "Charlie"
#>  $ value     : num [1:3] 1.5 2.3 3.7
#>  $ date      : Date[1:3], format: "2024-01-01" "2024-01-02" ...
#>  $ logical   : logi [1:3] TRUE FALSE TRUE
#>  $ factor_col: Factor w/ 3 levels "A","B","C": 1 2 3

# Excel preserves some types
write_xlsx(complex_data, "types_excel.xlsx")
from_excel <- read_excel("types_excel.xlsx")
str(from_excel)
#> tibble [3 × 6] (S3: tbl_df/tbl/data.frame)
#>  $ id        : num [1:3] 1 2 3
#>  $ name      : chr [1:3] "Alice" "Bob" "Charlie"
#>  $ value     : num [1:3] 1.5 2.3 3.7
#>  $ date      : POSIXct[1:3], format: "2024-01-01" "2024-01-02" ...
#>  $ logical   : logi [1:3] TRUE FALSE TRUE
#>  $ factor_col: chr [1:3] "A" "B" "C"

34.8 Large Data Export

🎯 Best Practice: Writing Large Files

# For very large data
library(data.table)
fwrite(large_data, "large_file.csv")

# Write in chunks
write_csv_chunked <- function(data, path, chunk_size = 10000) {
  n_chunks <- ceiling(nrow(data) / chunk_size)
  
  for (i in 1:n_chunks) {
    start <- (i - 1) * chunk_size + 1
    end <- min(i * chunk_size, nrow(data))
    
    chunk <- data[start:end, ]
    
    write_csv(chunk, path, 
              append = i > 1,  # Append after first chunk
              col_names = i == 1)  # Only write headers first time
    
    cat("Wrote chunk", i, "of", n_chunks, "\n")
  }
}

# Compressed output
library(readr)
write_csv(data, gzfile("data.csv.gz"))

34.9 R-Specific Formats

💡 Key Insight: R Binary Formats

# RDS - single object
saveRDS(mtcars, "mtcars.rds")
cars_rds <- readRDS("mtcars.rds")

# RData/rda - multiple objects
x <- 1:10
y <- "text"
save(x, y, mtcars, file = "data.RData")
rm(x, y, mtcars)
load("data.RData")  # Restores objects with original names

# Feather - fast, interoperable
library(arrow)
#> Error in library(arrow): there is no package called 'arrow'
write_feather(mtcars, "mtcars.feather")
#> Error in write_feather(mtcars, "mtcars.feather"): could not find function "write_feather"
cars_feather <- read_feather("mtcars.feather")
#> Error in read_feather("mtcars.feather"): could not find function "read_feather"

# Parquet - compressed, columnar
write_parquet(mtcars, "mtcars.parquet")
#> Error in write_parquet(mtcars, "mtcars.parquet"): could not find function "write_parquet"
cars_parquet <- read_parquet("mtcars.parquet")
#> Error in read_parquet("mtcars.parquet"): could not find function "read_parquet"

34.10 Error #3: Overwriting Files

⭐ BEGINNER ⚠️ SAFETY

34.10.1 The Problem

# Accidentally overwrite important file
# write_csv(new_data, "important_results.csv")

34.10.2 Solutions

SOLUTION: Safe Write with Backup

safe_write_with_backup <- function(data, path, backup = TRUE, ...) {
  # If file exists and backup requested
  if (file.exists(path) && backup) {
    # Create backup
    backup_path <- paste0(path, ".backup.", 
                          format(Sys.time(), "%Y%m%d_%H%M%S"))
    file.copy(path, backup_path)
    message("Created backup: ", basename(backup_path))
  }
  
  # Write new file
  write_csv(data, path, ...)
  message("Wrote file: ", path)
  
  invisible(path)
}

# Create test file
write_csv(mtcars[1:5, ], "important.csv")

# Safely overwrite
safe_write_with_backup(mtcars[1:10, ], "important.csv")
#> Created backup: important.csv.backup.20251026_154714
#> Wrote file: important.csv

# Check backups
list.files(pattern = "important")
#> [1] "important.csv"                       
#> [2] "important.csv.backup.20251026_154714"

34.11 Format Comparison

🎯 Best Practice: Choose Right Format

# Compare formats
compare_formats <- function(data) {
  formats <- list(
    CSV = function() write_csv(data, "test.csv"),
    RDS = function() saveRDS(data, "test.rds"),
    Excel = function() write_xlsx(data, "test.xlsx"),
    Feather = function() write_feather(data, "test.feather"),
    Parquet = function() write_parquet(data, "test.parquet")
  )
  
  results <- data.frame(
    Format = names(formats),
    Size_KB = numeric(length(formats)),
    Time_ms = numeric(length(formats))
  )
  
  for (i in seq_along(formats)) {
    # Time it
    time <- system.time(formats[[i]]())["elapsed"] * 1000
    
    # Get size
    files <- list.files(pattern = "^test\\.")
    size <- sum(file.info(files)$size) / 1024
    
    results$Size_KB[i] <- round(size, 2)
    results$Time_ms[i] <- round(time, 2)
    
    # Cleanup
    unlink(files)
  }
  
  results
}

# Test with mtcars
compare_formats(mtcars)
#> Error in write_feather(data, "test.feather"): could not find function "write_feather"
#> Timing stopped at: 0.001 0 0

34.12 Summary

Key Takeaways:

  1. Create directories first - Check with dir.exists()
  2. Use readr functions - write_csv(), not write.csv()
  3. Check permissions - Verify file access
  4. Preserve types - Use RDS for R objects
  5. Backup important files - Before overwriting
  6. Choose right format - CSV for sharing, RDS for R
  7. Handle large files - Use data.table or chunks

Quick Reference:

Format Function Best For
CSV write_csv() Sharing, Excel
RDS saveRDS() R objects, types
Excel write_xlsx() Multiple sheets
Feather write_feather() Fast, interoperable
Parquet write_parquet() Big data, compressed

Writing Functions:

# CSV
write_csv(data, "file.csv")
write_csv2(data, "european.csv")  # Semicolon
write_tsv(data, "file.txt")       # Tab
write_delim(data, "file.txt", "|")

# Excel
library(writexl)
write_xlsx(data, "file.xlsx")
write_xlsx(list(Sheet1 = data1, Sheet2 = data2), "file.xlsx")

# R formats
saveRDS(data, "file.rds")
save(obj1, obj2, file = "file.RData")

# Modern formats
library(arrow)
write_feather(data, "file.feather")
write_parquet(data, "file.parquet")

Best Practices:

# ✅ Good
Create directories before writing
Check file permissions
Use write_csv() not write.csv()
Backup before overwriting
Choose appropriate format
Compress large files
Test write with small sample first

# ❌ Avoid
Writing to non-existent directories
Overwriting without backup
Using write.csv() for new code
Writing very large files without chunks
Ignoring permission errors

34.13 Cleanup

#> [1] "important.csv.backup.20251026_154714"