Announcing general availability of Native Scoring using PREDICT function in Azure SQL Database

Published Mar 23 2019 06:30 PM 152 Views
First published on MSDN on Sep 25, 2017
Today we are announcing the general availability of the native PREDICT Transact-SQL function in Azure SQL Database. The PREDICT function allows you to perform scoring in real-time using certain RevoScaleR or revoscalepy models in a SQL query without invoking the R or Python runtime.

The PREDICT function support was added in SQL Server 2017. It is a table-valued function that takes a RevoScaleR or revoscalepy model & data (in the form of a table or view or query) as inputs and generates predictions based on the machine learning model. More details of the PREDICT function can be found here .
/* Step 1: Setup schema */
drop table if exists iris_data, iris_models;
go

create table iris_data (
id int not null identity primary key
, "Sepal.Length" float not null, "Sepal.Width" float not null
, "Petal.Length" float not null, "Petal.Width" float not null
, "Species" varchar(100) null
);

create table iris_models (
model_name varchar(30) not null primary key,
model varbinary(max) not null,
native_model varbinary(max) not null
);
go

/* Step 2: Populate test data from iris dataset in R */
insert into iris_data
("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species")
execute   sp_execute_external_script
@language = N'R'
, @script = N'iris_data <- iris;'
, @input_data_1 = N''
, @output_data_1_name = N'iris_data';
go

/* Step 3: Create procedure for training model */
create or alter procedure generate_iris_model
(@trained_model varbinary(max) OUTPUT, @native_trained_model varbinary(max) OUTPUT)
as
begin
execute sp_execute_external_script
@language = N'R'
, @script = N'
# Build decision tree model to predict species based on sepal/petal attributes
iris_model <- rxDTree(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data = iris_rx_data);

# Serialize model to binary format for storage in SQL Server
trained_model <- as.raw(serialize(iris_model, connection=NULL));

# Serialize model to native binary format for scoring using PREDICT function in SQL Server
native_trained_model <- rxSerializeModel(iris_model, realtimeScoringOnly = TRUE)
'
, @input_data_1 = N'
select "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species"
from iris_data'
, @input_data_1_name = N'iris_rx_data'
, @params = N'
@trained_model varbinary(max) OUTPUT, @native_trained_model varbinary(max) OUTPUT'
, @trained_model = @trained_model OUTPUT
, @native_trained_model = @native_trained_model OUTPUT;
end;
go


/* Step 3: Train & store a decision tree model that will predict species of flowers */
declare @model varbinary(max), @native_model varbinary(max);
exec generate_iris_model @model OUTPUT, @native_model OUTPUT;

delete from iris_models where model_name = 'iris.dtree';
insert into iris_models (model_name, model, native_model) values('iris.dtree', @model, @native_model);

select model_name
, datalength(model)/1024. as model_size_kb
, datalength(native_model)/1024. as native_model_size_kb
from iris_models;
go

/* Step 4: Generate predictions using PREDICT function */
declare @native_model varbinary(max) =
(select native_model from iris_models where model_name = 'iris.dtree');
select p.*, d.Species as "Species.Actual", d.id
from PREDICT(MODEL = @native_model, DATA = dbo.iris_data as d)
with(setosa_Pred float, versicolor_Pred float, virginica_Pred float) as p;
go
%3CLINGO-SUB%20id%3D%22lingo-sub-386151%22%20slang%3D%22en-US%22%3EAnnouncing%20general%20availability%20of%20Native%20Scoring%20using%20PREDICT%20function%20in%20Azure%20SQL%20Database%3C%2FLINGO-SUB%3E%3CLINGO-BODY%20id%3D%22lingo-body-386151%22%20slang%3D%22en-US%22%3E%0A%20%26lt%3Bmeta%20http-equiv%3D%22Content-Type%22%20content%3D%22text%2Fhtml%3B%20charset%3DUTF-8%22%20%2F%26gt%3B%3CSTRONG%3EFirst%20published%20on%20MSDN%20on%20Sep%2025%2C%202017%20%3C%2FSTRONG%3E%3CBR%20%2F%3EToday%20we%20are%20announcing%20the%20general%20availability%20of%20the%20native%20PREDICT%20Transact-SQL%20function%20in%20Azure%20SQL%20Database.%20The%20PREDICT%20function%20allows%20you%20to%20perform%20scoring%20in%20real-time%20using%20certain%20RevoScaleR%20or%20revoscalepy%20models%26nbsp%3Bin%20a%20SQL%20query%20without%20invoking%20the%20R%20or%20Python%20runtime.%20%3CBR%20%2F%3E%20%3CBR%20%2F%3E%20The%20PREDICT%20function%20support%20was%20added%20in%20SQL%20Server%202017.%20It%20is%20a%20table-valued%20function%20that%20takes%20a%26nbsp%3BRevoScaleR%20or%20revoscalepy%26nbsp%3Bmodel%20%26amp%3B%20data%20(in%20the%20form%20of%20a%20table%20or%20view%20or%20query)%20as%20inputs%20and%20generates%20predictions%20based%20on%20the%20machine%20learning%20model.%20More%20details%20of%20the%20PREDICT%20function%20can%20be%20found%20%3CA%20href%3D%22https%3A%2F%2Fdocs.microsoft.com%2Fen-us%2Fsql%2Ft-sql%2Fqueries%2Fpredict-transact-sql%22%20target%3D%22_blank%22%20rel%3D%22noopener%20noreferrer%22%3Ehere%20%3C%2FA%3E.%20%3CBR%20%2F%3E%20%2F*%20Step%201%3A%20Setup%20schema%20*%2F%20%3CBR%20%2F%3Edrop%20table%20if%20exists%20iris_data%2C%20iris_models%3B%20%3CBR%20%2F%3Ego%20%3CBR%20%2F%3E%3CBR%20%2F%3Ecreate%20table%20iris_data%20(%20%3CBR%20%2F%3Eid%20int%20not%20null%20identity%20primary%20key%20%3CBR%20%2F%3E%2C%20%22Sepal.Length%22%20float%20not%20null%2C%20%22Sepal.Width%22%20float%20not%20null%20%3CBR%20%2F%3E%2C%20%22Petal.Length%22%20float%20not%20null%2C%20%22Petal.Width%22%20float%20not%20null%20%3CBR%20%2F%3E%2C%20%22Species%22%20varchar(100)%20null%20%3CBR%20%2F%3E)%3B%20%3CBR%20%2F%3E%3CBR%20%2F%3Ecreate%20table%20iris_models%20(%20%3CBR%20%2F%3Emodel_name%20varchar(30)%20not%20null%20primary%20key%2C%20%3CBR%20%2F%3Emodel%20varbinary(max)%20not%20null%2C%20%3CBR%20%2F%3Enative_model%20varbinary(max)%20not%20null%20%3CBR%20%2F%3E)%3B%20%3CBR%20%2F%3Ego%20%3CBR%20%2F%3E%3CBR%20%2F%3E%2F*%20Step%202%3A%20Populate%20test%20data%20from%20iris%20dataset%20in%20R%20*%2F%20%3CBR%20%2F%3Einsert%20into%20iris_data%20%3CBR%20%2F%3E(%22Sepal.Length%22%2C%20%22Sepal.Width%22%2C%20%22Petal.Length%22%2C%20%22Petal.Width%22%2C%20%22Species%22)%20%3CBR%20%2F%3Eexecute%26nbsp%3B%26nbsp%3B%20sp_execute_external_script%20%3CBR%20%2F%3E%40language%20%3D%20N'R'%20%3CBR%20%2F%3E%2C%20%40script%20%3D%20N'iris_data%20%26lt%3B-%20iris%3B'%20%3CBR%20%2F%3E%2C%20%40input_data_1%20%3D%20N''%20%3CBR%20%2F%3E%2C%20%40output_data_1_name%20%3D%20N'iris_data'%3B%20%3CBR%20%2F%3Ego%20%3CBR%20%2F%3E%3CBR%20%2F%3E%2F*%20Step%203%3A%20Create%20procedure%20for%20training%20model%20*%2F%20%3CBR%20%2F%3Ecreate%20or%20alter%20procedure%20generate_iris_model%20%3CBR%20%2F%3E(%40trained_model%20varbinary(max)%20OUTPUT%2C%20%40native_trained_model%20varbinary(max)%20OUTPUT)%20%3CBR%20%2F%3Eas%20%3CBR%20%2F%3Ebegin%20%3CBR%20%2F%3Eexecute%20sp_execute_external_script%20%3CBR%20%2F%3E%40language%20%3D%20N'R'%20%3CBR%20%2F%3E%2C%20%40script%20%3D%20N'%20%3CBR%20%2F%3E%23%20Build%20decision%20tree%20model%20to%20predict%20species%20based%20on%20sepal%2Fpetal%20attributes%20%3CBR%20%2F%3Eiris_model%20%26lt%3B-%20rxDTree(Species%20~%20Sepal.Length%20%2B%20Sepal.Width%20%2B%20Petal.Length%20%2B%20Petal.Width%2C%20data%20%3D%20iris_rx_data)%3B%20%3CBR%20%2F%3E%3CBR%20%2F%3E%23%20Serialize%20model%20to%20binary%20format%20for%20storage%20in%20SQL%20Server%20%3CBR%20%2F%3Etrained_model%20%26lt%3B-%20as.raw(serialize(iris_model%2C%20connection%3DNULL))%3B%20%3CBR%20%2F%3E%3CBR%20%2F%3E%23%20Serialize%20model%20to%20native%20binary%20format%20for%20scoring%20using%20PREDICT%20function%20in%20SQL%20Server%20%3CBR%20%2F%3Enative_trained_model%20%26lt%3B-%20rxSerializeModel(iris_model%2C%20realtimeScoringOnly%20%3D%20TRUE)%20%3CBR%20%2F%3E'%20%3CBR%20%2F%3E%2C%20%40input_data_1%20%3D%20N'%20%3CBR%20%2F%3Eselect%20%22Sepal.Length%22%2C%20%22Sepal.Width%22%2C%20%22Petal.Length%22%2C%20%22Petal.Width%22%2C%20%22Species%22%20%3CBR%20%2F%3Efrom%20iris_data'%20%3CBR%20%2F%3E%2C%20%40input_data_1_name%20%3D%20N'iris_rx_data'%20%3CBR%20%2F%3E%2C%20%40params%20%3D%20N'%20%3CBR%20%2F%3E%40trained_model%20varbinary(max)%20OUTPUT%2C%20%40native_trained_model%20varbinary(max)%20OUTPUT'%20%3CBR%20%2F%3E%2C%20%40trained_model%20%3D%20%40trained_model%20OUTPUT%20%3CBR%20%2F%3E%2C%20%40native_trained_model%20%3D%20%40native_trained_model%20OUTPUT%3B%20%3CBR%20%2F%3Eend%3B%20%3CBR%20%2F%3Ego%20%3CBR%20%2F%3E%3CBR%20%2F%3E%3CBR%20%2F%3E%2F*%20Step%203%3A%20Train%20%26amp%3B%20store%20a%20decision%20tree%20model%20that%20will%20predict%20species%20of%20flowers%20*%2F%20%3CBR%20%2F%3Edeclare%20%40model%20varbinary(max)%2C%20%40native_model%20varbinary(max)%3B%20%3CBR%20%2F%3Eexec%20generate_iris_model%20%40model%20OUTPUT%2C%20%40native_model%20OUTPUT%3B%20%3CBR%20%2F%3E%3CBR%20%2F%3Edelete%20from%20iris_models%20where%20model_name%20%3D%20'iris.dtree'%3B%20%3CBR%20%2F%3Einsert%20into%20iris_models%20(model_name%2C%20model%2C%20native_model)%20values('iris.dtree'%2C%20%40model%2C%20%40native_model)%3B%20%3CBR%20%2F%3E%3CBR%20%2F%3Eselect%20model_name%20%3CBR%20%2F%3E%2C%20datalength(model)%2F1024.%20as%20model_size_kb%20%3CBR%20%2F%3E%2C%20datalength(native_model)%2F1024.%20as%20native_model_size_kb%20%3CBR%20%2F%3Efrom%20iris_models%3B%20%3CBR%20%2F%3Ego%20%3CBR%20%2F%3E%3CBR%20%2F%3E%2F*%20Step%204%3A%20Generate%20predictions%20using%20PREDICT%20function%20*%2F%20%3CBR%20%2F%3Edeclare%20%40native_model%20varbinary(max)%20%3D%20%3CBR%20%2F%3E(select%20native_model%20from%20iris_models%20where%20model_name%20%3D%20'iris.dtree')%3B%20%3CBR%20%2F%3Eselect%20p.*%2C%20d.Species%20as%20%22Species.Actual%22%2C%20d.id%20%3CBR%20%2F%3Efrom%20PREDICT(MODEL%20%3D%20%40native_model%2C%20DATA%20%3D%20dbo.iris_data%20as%20d)%20%3CBR%20%2F%3Ewith(setosa_Pred%20float%2C%20versicolor_Pred%20float%2C%20virginica_Pred%20float)%20as%20p%3B%20%3CBR%20%2F%3Ego%20%3CBR%20%2F%3E%3C%2FLINGO-BODY%3E%3CLINGO-TEASER%20id%3D%22lingo-teaser-386151%22%20slang%3D%22en-US%22%3EFirst%20published%20on%20MSDN%20on%20Sep%2025%2C%202017%20Today%20we%20are%20announcing%20the%20general%20availability%20of%20the%20native%20PREDICT%20Transact-SQL%20function%20in%20Azure%20SQL%20Database.%3C%2FLINGO-TEASER%3E%3CLINGO-LABS%20id%3D%22lingo-labs-386151%22%20slang%3D%22en-US%22%3E%3CLINGO-LABEL%3ESQLServerStorageEngine%3C%2FLINGO-LABEL%3E%3C%2FLINGO-LABS%3E
Version history
Last update:
‎Nov 09 2020 09:44 AM
Updated by: