Chapter 35 Database Connections
What You’ll Learn:
- Connecting to databases
- SQL queries from R
- Common database errors
- Connection management
- Best practices
Key Errors Covered: 15+ database errors
Difficulty: ⭐⭐⭐ Advanced
35.2 Database Basics
💡 Key Insight: DBI Package
library(DBI)
library(RSQLite)
# Create in-memory SQLite database
con <- dbConnect(RSQLite::SQLite(), ":memory:")
# Write data to database
dbWriteTable(con, "mtcars", mtcars)
dbWriteTable(con, "iris", iris)
# List tables
dbListTables(con)
#> [1] "iris" "mtcars"
# Get table info
dbListFields(con, "mtcars")
#> [1] "mpg" "cyl" "disp" "hp" "drat"
#> [6] "wt" "qsec" "vs" "am" "gear"
#> [11] "carb" "cyl_factor"
# Read entire table
data <- dbReadTable(con, "mtcars")
head(data, 3)
#> mpg cyl disp hp drat wt qsec vs am gear carb cyl_factor
#> 1 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 6
#> 2 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 6
#> 3 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 4
# Always disconnect when done
dbDisconnect(con)35.3 SQL Queries
💡 Key Insight: Running Queries
# Reconnect
con <- dbConnect(RSQLite::SQLite(), ":memory:")
dbWriteTable(con, "mtcars", mtcars)
# Simple query
result <- dbGetQuery(con, "SELECT * FROM mtcars WHERE cyl = 4")
head(result, 3)
#> mpg cyl disp hp drat wt qsec vs am gear carb cyl_factor
#> 1 22.8 4 108.0 93 3.85 2.32 18.61 1 1 4 1 4
#> 2 24.4 4 146.7 62 3.69 3.19 20.00 1 0 4 2 4
#> 3 22.8 4 140.8 95 3.92 3.15 22.90 1 0 4 2 4
# Query with parameters (safe from SQL injection)
result <- dbGetQuery(
con,
"SELECT * FROM mtcars WHERE cyl = ? AND mpg > ?",
params = list(4, 25)
)
head(result)
#> mpg cyl disp hp drat wt qsec vs am gear carb cyl_factor
#> 1 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 4
#> 2 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 4
#> 3 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 4
#> 4 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 4
#> 5 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 4
#> 6 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 4
# Count rows
count <- dbGetQuery(con, "SELECT COUNT(*) as n FROM mtcars")
count$n
#> [1] 32
# Aggregation
summary <- dbGetQuery(con,
"SELECT cyl, AVG(mpg) as avg_mpg, COUNT(*) as count
FROM mtcars
GROUP BY cyl")
summary
#> cyl avg_mpg count
#> 1 4 26.66364 11
#> 2 6 19.74286 7
#> 3 8 15.10000 14
dbDisconnect(con)35.4 Error #1: could not connect to server
⭐⭐ INTERMEDIATE 🔌 CONNECTION
35.4.1 The Error
# Try to connect to non-existent server
con <- dbConnect(
RPostgreSQL::PostgreSQL(),
host = "nonexistent.server.com",
dbname = "mydb"
)
#> Error in (function (cond) : error in evaluating the argument 'drv' in selecting a method for function 'dbConnect': there is no package called 'RPostgreSQL'🔴 ERROR
Error: could not connect to server
35.4.2 Solutions
✅ SOLUTION: Safe Connection Function
safe_db_connect <- function(drv, ..., timeout = 5) {
# Try to connect with timeout
tryCatch({
con <- dbConnect(drv, ...)
# Test connection
if (dbIsValid(con)) {
message("Successfully connected to database")
return(con)
} else {
stop("Connection established but not valid")
}
}, error = function(e) {
stop("Failed to connect: ", e$message,
"\nCheck host, port, credentials, and network")
})
}
# Use it
con <- safe_db_connect(
RSQLite::SQLite(),
dbname = ":memory:"
)
#> Successfully connected to database
dbIsValid(con)
#> [1] TRUE
dbDisconnect(con)35.5 Error #2: table ... does not exist
⭐ BEGINNER 📊 SQL
35.5.1 The Error
con <- dbConnect(RSQLite::SQLite(), ":memory:")
# Try to query non-existent table
dbGetQuery(con, "SELECT * FROM nonexistent_table")
#> Error: no such table: nonexistent_table🔴 ERROR
Error: no such table: nonexistent_table
35.5.2 Solutions
✅ SOLUTION: Check Table Exists
con <- dbConnect(RSQLite::SQLite(), ":memory:")
dbWriteTable(con, "mtcars", mtcars)
# Check if table exists
table_exists <- function(con, table_name) {
table_name %in% dbListTables(con)
}
# Safe query
safe_query <- function(con, sql, table_name = NULL) {
# Extract table name from SQL if not provided
if (is.null(table_name)) {
# Simple extraction (works for basic queries)
table_match <- regmatches(sql, regexpr("FROM\\s+(\\w+)", sql, ignore.case = TRUE))
if (length(table_match) > 0) {
table_name <- sub("FROM\\s+", "", table_match, ignore.case = TRUE)
}
}
# Check table exists
if (!is.null(table_name) && !table_exists(con, table_name)) {
stop("Table '", table_name, "' does not exist. Available tables: ",
paste(dbListTables(con), collapse = ", "))
}
# Run query
dbGetQuery(con, sql)
}
# Test
head(safe_query(con, "SELECT * FROM mtcars"), 3)
#> mpg cyl disp hp drat wt qsec vs am gear carb cyl_factor
#> 1 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 6
#> 2 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 6
#> 3 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 4
dbDisconnect(con)35.6 dplyr with Databases
💡 Key Insight: Using dplyr with Databases
library(dplyr)
library(dbplyr)
con <- dbConnect(RSQLite::SQLite(), ":memory:")
dbWriteTable(con, "mtcars", mtcars)
# Create dplyr table reference
cars_tbl <- tbl(con, "mtcars")
# Use dplyr verbs (lazy evaluation)
result <- cars_tbl %>%
filter(cyl == 4) %>%
select(mpg, hp, wt) %>%
arrange(desc(mpg))
#> Error in select(., mpg, hp, wt): unused arguments (mpg, hp, wt)
# See the SQL that will be generated
result %>% show_query()
#> Error in UseMethod("show_query"): no applicable method for 'show_query' applied to an object of class "data.frame"
# Execute and collect results
data <- result %>% collect()
head(data)
#> mpg cyl disp hp drat wt qsec vs am gear carb cyl_factor
#> 1 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 4
#> 2 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 4
#> 3 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 4
#> 4 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 4
#> 5 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 4
#> 6 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 4
# Or compute and store in database
cars_tbl %>%
filter(mpg > 20) %>%
compute("efficient_cars")
#> Error in compute(., "efficient_cars"): unused argument ("efficient_cars")
dbListTables(con)
#> [1] "mtcars"
dbDisconnect(con)35.7 Connection Management
🎯 Best Practice: Manage Connections
# Pattern 1: Explicit disconnect
con <- dbConnect(RSQLite::SQLite(), ":memory:")
# ... do work ...
dbDisconnect(con)
# Pattern 2: Use on.exit (safer)
query_with_cleanup <- function() {
con <- dbConnect(RSQLite::SQLite(), ":memory:")
on.exit(dbDisconnect(con)) # Always disconnect
dbWriteTable(con, "test", mtcars)
return(dbGetQuery(con, "SELECT COUNT(*) FROM test"))
}
result <- query_with_cleanup()
result
#> COUNT(*)
#> 1 32
# Pattern 3: Connection pooling (for web apps)
# library(pool)
# pool <- dbPool(RSQLite::SQLite(), dbname = ":memory:")
# con <- poolCheckout(pool)
# # ... use connection ...
# poolReturn(con)
# poolClose(pool)35.8 Writing to Databases
💡 Key Insight: Writing Data
con <- dbConnect(RSQLite::SQLite(), ":memory:")
# Write entire data frame
dbWriteTable(con, "iris", iris)
# Append to existing table
dbWriteTable(con, "iris", iris[1:10, ], append = TRUE)
# Overwrite existing table
dbWriteTable(con, "iris", iris, overwrite = TRUE)
# Check row count
dbGetQuery(con, "SELECT COUNT(*) as count FROM iris")
#> count
#> 1 150
# Insert single row
dbExecute(con,
"INSERT INTO iris (Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, Species)
VALUES (?, ?, ?, ?, ?)",
params = list(5.1, 3.5, 1.4, 0.2, "setosa")
)
#> Error: near ".": syntax error
dbDisconnect(con)35.9 Prepared Statements
🎯 Best Practice: Use Parameters
con <- dbConnect(RSQLite::SQLite(), ":memory:")
dbWriteTable(con, "mtcars", mtcars)
# ❌ BAD: SQL injection vulnerable
user_input <- "4 OR 1=1" # Malicious input
# query <- paste0("SELECT * FROM mtcars WHERE cyl = ", user_input)
# DON'T DO THIS!
# ✅ GOOD: Parameterized query
safe_query_by_cyl <- function(con, cyl_value) {
dbGetQuery(
con,
"SELECT * FROM mtcars WHERE cyl = ?",
params = list(cyl_value)
)
}
result <- safe_query_by_cyl(con, 4)
nrow(result)
#> [1] 11
dbDisconnect(con)35.10 Error #3: columns ... have mismatching types
⭐⭐ INTERMEDIATE 🔢 TYPE
35.10.2 Solutions
✅ SOLUTION: Match Types
# Check table schema
dbGetQuery(con, "PRAGMA table_info(test)")
#> cid name type notnull dflt_value pk
#> 1 0 id INTEGER 0 NA 0
#> 2 1 value REAL 0 NA 0
# Convert data to match
data_correct <- data.frame(
id = as.integer(1:3),
value = as.numeric(c(1.5, 2.3, 3.7))
)
dbWriteTable(con, "test", data_correct, append = TRUE)
# Verify
dbReadTable(con, "test")
#> Warning: Column `value`: mixed type, first seen values of type string, coercing
#> other values of type real
#> id value
#> 1 1 a
#> 2 2 b
#> 3 3 c
#> 4 1 1.5
#> 5 2 2.3
#> 6 3 3.7
dbDisconnect(con)35.11 Transactions
🎯 Best Practice: Use Transactions
con <- dbConnect(RSQLite::SQLite(), ":memory:")
dbWriteTable(con, "accounts", data.frame(id = 1:3, balance = c(100, 200, 150)))
# Transaction ensures all-or-nothing
safe_transfer <- function(con, from_id, to_id, amount) {
# Start transaction
dbBegin(con)
tryCatch({
# Deduct from sender
dbExecute(con,
"UPDATE accounts SET balance = balance - ? WHERE id = ?",
params = list(amount, from_id))
# Add to receiver
dbExecute(con,
"UPDATE accounts SET balance = balance + ? WHERE id = ?",
params = list(amount, to_id))
# Commit if both succeed
dbCommit(con)
message("Transfer successful")
}, error = function(e) {
# Rollback on error
dbRollback(con)
stop("Transfer failed: ", e$message)
})
}
# Before
dbReadTable(con, "accounts")
#> id balance
#> 1 1 100
#> 2 2 200
#> 3 3 150
# Transfer
safe_transfer(con, from_id = 1, to_id = 2, amount = 50)
#> Transfer successful
# After
dbReadTable(con, "accounts")
#> id balance
#> 1 1 50
#> 2 2 250
#> 3 3 150
dbDisconnect(con)35.12 Summary
Key Takeaways:
- Use DBI package - Standard interface for all databases
- Always disconnect - Use on.exit() for safety
- Parameterize queries - Prevent SQL injection
- Check table exists - Before querying
- Use dplyr - For familiar syntax
- Manage connections - Pool for web apps
- Use transactions - For multiple operations
Quick Reference:
| Function | Purpose |
|---|---|
| dbConnect() | Connect to database |
| dbDisconnect() | Close connection |
| dbListTables() | List tables |
| dbReadTable() | Read entire table |
| dbWriteTable() | Write data frame |
| dbGetQuery() | Run SELECT query |
| dbExecute() | Run INSERT/UPDATE/DELETE |
| dbBegin/Commit/Rollback() | Transactions |
Database Operations:
# Connect
library(DBI)
con <- dbConnect(RSQLite::SQLite(), "database.db")
# List and inspect
dbListTables(con)
dbListFields(con, "table_name")
# Read data
data <- dbReadTable(con, "table_name")
data <- dbGetQuery(con, "SELECT * FROM table WHERE col = ?",
params = list(value))
# Write data
dbWriteTable(con, "table_name", data_frame)
dbExecute(con, "INSERT INTO table VALUES (?, ?)",
params = list(val1, val2))
# dplyr interface
library(dplyr)
tbl(con, "table_name") %>%
filter(col > 10) %>%
collect()
# Always disconnect
dbDisconnect(con)Best Practices:
# ✅ Good
Use parameterized queries
Always disconnect (use on.exit)
Check dbIsValid() before queries
Use dplyr for complex operations
Use transactions for multiple operations
Handle connection errors gracefully
# ❌ Avoid
Building SQL with paste() (SQL injection!)
Leaving connections open
Assuming table exists
Not handling connection errors