Azure Database for MySQL and R can be used together for data analysis – MySQL as database engine and R as statistical tool. When dealing with large datasets that potentially exceed the memory of your machine it is recommended to push the data into database engine, where you can query the data in smaller digestible chunks.
In this article we will learn how to use R to perform the following tasks:
# Install and load required packages ipak <- function(pkg){ new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])] if (length(new.pkg)) install.packages(new.pkg, dependencies = TRUE) sapply(pkg, require, character.only = TRUE) } packages <- c("AzureRMR", "RMySQL", "RMariaDB", "tidyverse", "curl", "ggplot2", "fun") ipak(packages) # Create Azure Database for MySQL using AzureRMR package subscriptionId <- "ffffffff-ffff-ffff-ffff-ffffffffffff" resourceGroup <- "test_group" location <- "southcentralus" mysqlUserName <- "azureuser" mysqlPassword <- random_password(length = 12, replace = FALSE, extended = FALSE) mysqlServerName <- "testserver" az <- create_azure_login() sub <- az$get_subscription(subscriptionId) rg <- sub$create_resource_group(resourceGroup, location) parameters <- jsonlite::toJSON(list( administratorLogin=list(value=mysqlUserName), administratorLoginPassword=list(value=mysqlPassword), location=list(value=location), serverName=list(value=mysqlServerName), skuCapacity=list(value=2), skuFamily=list(value="Gen5"), skuName=list(value="GP_Gen5_2"), skuSizeMB=list(value=5120), skuTier=list(value="GeneralPurpose"), version=list(value="5.7"), backupRetentionDays=list(value=7), geoRedundantBackup=list(value="Disabled") ), auto_unbox=TRUE) template <- "https://raw.githubusercontent.com/Azure/azure-mysql/master/arm-templates/ExampleWithFirewallRule/template.json" vm_tpl <- rg$deploy_template("myNewMySQLServer", template=template, parameters=parameters, wait=TRUE) # Connect to Azure Database for MySQL certName <- "BaltimoreCyberTrustRoot.crt" download.file(url = "https://www.digicert.com/CACerts/BaltimoreCyberTrustRoot.crt.pem", destfile = certName, mode='wb') # Using RMariaDB::MariaDB() instead of RMySQL::MySQL() due to a bug related to SSL in RMySQL package con <- dbConnect(RMariaDB::MariaDB(), host= paste0(mysqlServerName, ".mysql.database.azure.com"), dbname="mysql", user=paste0(mysqlUserName, "@", mysqlServerName), password=mysqlPassword, ssl.ca=certName, client.flag=2048) # create iris database irisTableName <- "iris" dbSendQuery(con, paste("CREATE DATABASE", irisTableName)) # connect to iris database con <- dbConnect(RMariaDB::MariaDB(), host= paste0(mysqlServerName, ".mysql.database.azure.com"), dbname="iris", user=paste0(mysqlUserName, "@", mysqlServerName), password=mysqlPassword, ssl.ca=certName, client.flag=2048) # create table iris and load data from a csv file write.table(iris, file="iris.csv", sep=",", row.names = FALSE, quote=FALSE) dbWriteTable(con, irisTableName, "iris.csv") dbReadTable(con, irisTableName) dbListFields(con, irisTableName) # query iris table using dplyr iristbl <- tbl(con, "iris") iristbl %>% group_by(Species) %>% summarize(count=n()) # show the query string for dplyr iristbl %>% group_by(Species) %>% summarize(count=n()) %>% show_query() # visualize data using ggplot irisTableData <- dbReadTable(con, irisTableName) ggplot(data=irisTableData, aes(x = Sepal.Length, y = Sepal.Width)) + geom_point(aes(color=Species, shape=Species)) + xlab("Sepal Length") + ylab("Sepal Width") + ggtitle("Sepal Length vs Width") # Cleanup dbRemoveTable(con, irisTableName) dbSendQuery(con, paste("DROP DATABASE", irisTableName)) dbDisconnect(con) rg$delete(confirm=FALSE) rm(list = ls(all.names = TRUE))
REFERENCES :
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.