This purpose of this project is to document how to securly automate a self hosted integration runtime (SHIR) for Azure Data Factory using terraform.
According to Microsoft Documentation , a self-hosted integration runtime can run copy activities between a cloud data store and a data store in a private network. It also can dispatch transform activities against compute resources in an on-premises network or an Azure virtual network. The installation of a self-hosted integration runtime needs an on-premises machine or a virtual machine inside a private network.
Automating this process requires an installtion script that will be used on a preferred virtual machine. Storing this script in a secure location and allowing your deployment to access this script becomes difficult in a completely private and secure Azure Data Factory environment. The technique used here is to upload the script to a secure storage account and allow the virtual machine private access to download the script from that same storage account
Here you can see that an Azure DevOps Pipeline is run on a Self Hosted DevOps Agent that is connected to the secure VNET of the target enviornment. This pipeline runs a Terraform deployment to set up a secure VNET, Private Storage Account, Virtual Machine (for the SHIR), and an Azure Data Factory.
This script will install the SHIR gateway from a virtual machine to a specific Azure Data Factory. The only prarmeter needed here is a authentication key for the gateway.
param(
[string]
$gatewayKey
)
# init log setting
$logLoc = "$env:SystemDrive\WindowsAzure\Logs\Plugins\Microsoft.Compute.CustomScriptExtension\"
if (! (Test-Path($logLoc))) {
New-Item -path $logLoc -type directory -Force
}
$logPath = "$logLoc\tracelog.log"
"Start to excute gatewayInstall.ps1. `n" | Out-File $logPath
function Now-Value() {
return (Get-Date -Format "yyyy-MM-dd HH:mm:ss")
}
function Throw-Error([string] $msg) {
try {
throw $msg
}
catch {
$stack = $_.ScriptStackTrace
Trace-Log "DMDTTP is failed: $msg`nStack:`n$stack"
}
throw $msg
}
function Trace-Log([string] $msg) {
$now = Now-Value
try {
"${now} $msg`n" | Out-File $logPath -Append
}
catch {
#ignore any exception during trace
}
}
function Run-Process([string] $process, [string] $arguments) {
Write-Verbose "Run-Process: $process $arguments"
$errorFile = "$env:tmp\tmp$pid.err"
$outFile = "$env:tmp\tmp$pid.out"
"" | Out-File $outFile
"" | Out-File $errorFile
$errVariable = ""
if ([string]::IsNullOrEmpty($arguments)) {
$proc = Start-Process -FilePath $process -Wait -Passthru -NoNewWindow `
-RedirectStandardError $errorFile -RedirectStandardOutput $outFile -ErrorVariable errVariable
}
else {
$proc = Start-Process -FilePath $process -ArgumentList $arguments -Wait -Passthru -NoNewWindow `
-RedirectStandardError $errorFile -RedirectStandardOutput $outFile -ErrorVariable errVariable
}
$errContent = [string] (Get-Content -Path $errorFile -Delimiter "!!!DoesNotExist!!!")
$outContent = [string] (Get-Content -Path $outFile -Delimiter "!!!DoesNotExist!!!")
Remove-Item $errorFile
Remove-Item $outFile
if ($proc.ExitCode -ne 0 -or $errVariable -ne "") {
Throw-Error "Failed to run process: exitCode=$($proc.ExitCode), errVariable=$errVariable, errContent=$errContent, outContent=$outContent."
}
Trace-Log "Run-Process: ExitCode=$($proc.ExitCode), output=$outContent"
if ([string]::IsNullOrEmpty($outContent)) {
return $outContent
}
return $outContent.Trim()
}
function Download-Gateway([string] $url, [string] $gwPath) {
try {
$ErrorActionPreference = "Stop";
$client = New-Object System.Net.WebClient
$client.DownloadFile($url, $gwPath)
Trace-Log "Download gateway successfully. Gateway loc: $gwPath"
}
catch {
Trace-Log "Fail to download gateway msi"
Trace-Log $_.Exception.ToString()
throw
}
}
function Install-Gateway([string] $gwPath) {
if ([string]::IsNullOrEmpty($gwPath)) {
Throw-Error "Gateway path is not specified"
}
if (!(Test-Path -Path $gwPath)) {
Throw-Error "Invalid gateway path: $gwPath"
}
Trace-Log "Start Gateway installation"
Run-Process "msiexec.exe" "/i gateway.msi INSTALLTYPE=AzureTemplate /quiet /norestart"
Start-Sleep -Seconds 30
Trace-Log "Installation of gateway is successful"
}
function Get-RegistryProperty([string] $keyPath, [string] $property) {
Trace-Log "Get-RegistryProperty: Get $property from $keyPath"
if (! (Test-Path $keyPath)) {
Trace-Log "Get-RegistryProperty: $keyPath does not exist"
}
$keyReg = Get-Item $keyPath
if (! ($keyReg.Property -contains $property)) {
Trace-Log "Get-RegistryProperty: $property does not exist"
return ""
}
return $keyReg.GetValue($property)
}
function Get-InstalledFilePath() {
$filePath = Get-RegistryProperty "hklm:\Software\Microsoft\DataTransfer\DataManagementGateway\ConfigurationManager" "DiacmdPath"
if ([string]::IsNullOrEmpty($filePath)) {
Throw-Error "Get-InstalledFilePath: Cannot find installed File Path"
}
Trace-Log "Gateway installation file: $filePath"
return $filePath
}
function Register-Gateway([string] $instanceKey) {
Trace-Log "Register Agent"
$filePath = Get-InstalledFilePath
Run-Process $filePath "-era 8060"
Run-Process $filePath "-k $instanceKey"
Trace-Log "Agent registration is successful!"
}
if ((Get-Process "diahost" -ea SilentlyContinue) -eq $Null) {
Trace-Log "Integration Runtime is not running. Initiating Download - Install - Register sequence.";
Trace-Log "Log file: $logLoc"
$uri = "https://go.microsoft.com/fwlink/?linkid=839822"
Trace-Log "Gateway download fw link: $uri"
$gwPath = "$PWD\gateway.msi"
Trace-Log "Gateway download location: $gwPath"
Download-Gateway $uri $gwPath
Install-Gateway $gwPath
Register-Gateway $gatewayKey
}
else {
Trace-Log "Integration Runtime is already running. Skipping installation & configuration.";
};
resource "azurerm_data_factory" "adf" {
name = "adf-poc-${random_string.random.result}"
location = "East US"
resource_group_name = azurerm_resource_group.rg.name
}
resource "azurerm_data_factory_integration_runtime_self_hosted" "shir" {
name = "adf-poc-shir"
resource_group_name = azurerm_resource_group.rg.name
data_factory_id = azurerm_data_factory.adf.id
}
The second resource here is needed to create the self hosted integration runtime within Azure data factory and is also used because this resource exposes the authentication key to create a gateway and connect a virtual machine to this runtime. This is the gateway key that we will pass into our powershell script.
#Storage account
resource "azurerm_storage_account" "storageaccount" {
name = "shirst${random_string.random.result}"
resource_group_name = azurerm_resource_group.rg.name
location = var.location
account_tier = "Standard"
account_replication_type = "LRS"
account_kind = "StorageV2"
min_tls_version = "TLS1_2"
blob_properties {
cors_rule {
allowed_headers = ["*"]
allowed_methods = ["DELETE", "GET", "HEAD", "MERGE", "POST", "OPTIONS", "PUT", "PATCH"]
allowed_origins = ["*"]
exposed_headers = ["*"]
max_age_in_seconds = 200
}
}
}
#Storage container and blob
resource "azurerm_storage_container" "newcontainer" {
name = "shir-script"
storage_account_name = azurerm_storage_account.storageaccount.name
container_access_type = "private"
}
resource "azurerm_storage_blob" "newblob" {
name = "adf-shir.ps1"
storage_account_name = azurerm_storage_account.storageaccount.name
storage_container_name = azurerm_storage_container.newcontainer.name
type = "Block"
access_tier = "Cool"
source = "../gatewayinstall.ps1"
}
#Storage network rules
resource "azurerm_storage_account_network_rules" "storageaccountnetworkrules" {
resource_group_name = azurerm_resource_group.rg.name
storage_account_name = azurerm_storage_account.storageaccount.name
default_action = "Deny"
ip_rules = []
virtual_network_subnet_ids = []
bypass = ["Metrics", "Logging", "AzureServices"]
depends_on = [
azurerm_storage_blob.newblob
]
}
#--------Storage Account Private Endpoints and DNS A Records--------#
#DFS
resource "azurerm_private_endpoint" "pe_000" {
name = "${azurerm_storage_account.storageaccount.name}-dfs"
location = var.location
resource_group_name = azurerm_resource_group.rg.name
subnet_id = azurerm_subnet.pe.id
private_service_connection {
name = "${azurerm_storage_account.storageaccount.name}-connection"
private_connection_resource_id = azurerm_storage_account.storageaccount.id
is_manual_connection = false
subresource_names = ["dfs"]
}
private_dns_zone_group {
name = azurerm_private_dns_zone.dfs_privatednszone.name
private_dns_zone_ids = [azurerm_private_dns_zone.dfs_privatednszone.id]
}
}
resource "azurerm_private_dns_a_record" "privatednsarecord-000" {
name = azurerm_private_endpoint.pe_000.name
zone_name = azurerm_private_dns_zone.dfs_privatednszone.name
resource_group_name = azurerm_resource_group.rg.name
ttl = "300"
records = [azurerm_private_endpoint.pe_000.private_service_connection.0.private_ip_address]
depends_on = [azurerm_private_endpoint.pe_000]
}
#Blob
resource "azurerm_private_endpoint" "pe_001" {
name = "${azurerm_storage_account.storageaccount.name}-blob"
location = var.location
resource_group_name = azurerm_resource_group.rg.name
subnet_id = azurerm_subnet.pe.id
private_service_connection {
name = "${azurerm_storage_account.storageaccount.name}-connection"
private_connection_resource_id = azurerm_storage_account.storageaccount.id
is_manual_connection = false
subresource_names = ["blob"]
}
private_dns_zone_group {
name = azurerm_private_dns_zone.blob_privatednszone.name
private_dns_zone_ids = [azurerm_private_dns_zone.blob_privatednszone.id]
}
}
resource "azurerm_private_dns_a_record" "privatednsarecord-001" {
name = azurerm_private_endpoint.pe_001.name
zone_name = azurerm_private_dns_zone.blob_privatednszone.name
resource_group_name = azurerm_resource_group.rg.name
ttl = "300"
records = [azurerm_private_endpoint.pe_001.private_service_connection.0.private_ip_address]
depends_on = [azurerm_private_endpoint.pe_001]
}
#Windows VM
resource "azurerm_windows_virtual_machine" "main" {
name = "shir-vm-${random_string.random.result}"
location = var.location
resource_group_name = azurerm_resource_group.rg.name
network_interface_ids = [azurerm_network_interface.nic.id]
size = "Standard_B1s"
admin_username = "testadmin"
admin_password = "Password1234!"
source_image_reference {
publisher = "MicrosoftWindowsServer"
offer = "WindowsServer"
sku = "2019-Datacenter"
version = "latest"
}
os_disk {
name = "myosdisk1"
caching = "ReadWrite"
storage_account_type = "Standard_LRS"
}
identity {
type = "SystemAssigned"
}
}
#VM Custom Script Extension
resource "azurerm_virtual_machine_extension" "vmextension-0000" {
name = "ADF-SHIR"
virtual_machine_id = azurerm_windows_virtual_machine.main.id
publisher = "Microsoft.Compute"
type = "CustomScriptExtension"
type_handler_version = "1.10"
auto_upgrade_minor_version = true
protected_settings = <<PROTECTED_SETTINGS
{
"fileUris": ["${format("https://%s.blob.core.windows.net/%s/%s", azurerm_storage_account.storageaccount.name, azurerm_storage_container.newcontainer.name, azurerm_storage_blob.newblob.name)}"],
"commandToExecute": "${join(" ", ["powershell.exe -ExecutionPolicy Unrestricted -File",azurerm_storage_blob.newblob.name,"-gatewayKey ${azurerm_data_factory_integration_runtime_self_hosted.shir.auth_key_1}"])}",
"storageAccountName": "${azurerm_storage_account.storageaccount.name}",
"storageAccountKey": "${azurerm_storage_account.storageaccount.primary_access_key}"
}
PROTECTED_SETTINGS
}
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.