Hi!
In previous posts I shared how to host and chat with a Llama 2 model hosted locally with Ollama. (view post). And then, it was time to learn how to integrate Semantic Kernel with OllamaSharp (nuget package and repo).
OllamaSharp is a .NET binding for the Ollama API, making it easy to interact with Ollama using your favorite .NET languages.
So, I decided to try it, and create a Chat Completion and a Text Generation specific implementation for Semantic Kernel using this library.
The full test is a console app using both services with Semantic Kernel.
The Text Generation Service is an easy one. Just implement the interface Microsoft.SemanticKernel.TextGeneration.ITextGenerationService, and the generated code looks like this:
// Copyright (c) 2024
// Author : Bruno Capuano
// Change Log :
// – Sample Text Generation Service for Ollama models
//
// The MIT License (MIT)
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using OllamaSharp;
using OllamaSharp.Models.Chat;
namespace sk_ollamacsharp
{
public class OllamaTextGenerationService : Microsoft.SemanticKernel.TextGeneration.ITextGenerationService
{
// public property for the model url endpoint
public string ModelUrl { get; set; }
public string ModelName { get; set; }
public IReadOnlyDictionary<string, object?> Attributes => throw new NotImplementedException();
public IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
}
public async Task<IReadOnlyList<TextContent>> GetTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
{
var ollama = new OllamaApiClient(ModelUrl, ModelName);
var completionResponse = await ollama.GetCompletion(prompt, null, CancellationToken.None);
TextContent stc = new TextContent(completionResponse.Response);
return new List<TextContent> { stc };
}
}
}
The chat completion, requires the implementation of the interface: IChatCompletionService. The code looks like this:
// Copyright (c) 2024
// Author : Bruno Capuano
// Change Log :
// – Sample Chat Completion Service for Ollama models
//
// The MIT License (MIT)
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using OllamaSharp;
using OllamaSharp.Models.Chat;
namespace sk_ollamacsharp
{
public class OllamaChatCompletionService : IChatCompletionService
{
// public property for the model url endpoint
public string ModelUrl { get; set; }
public string ModelName { get; set; }
public IReadOnlyDictionary<string, object?> Attributes => throw new NotImplementedException();
public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
{
var ollama = new OllamaApiClient(ModelUrl, ModelName); // (uri);
var chat = new Chat(ollama, _ => { });
// iterate though chatHistory Messages
foreach (var message in chatHistory)
{
if (message.Role == AuthorRole.System)
{
await chat.SendAs(ChatRole.System, message.Content);
continue;
}
}
var lastMessage = chatHistory.LastOrDefault();
string question = lastMessage.Content;
var chatResponse = "";
var history = (await chat.Send(question, CancellationToken.None)).ToArray();
var last = history.Last();
chatResponse = last.Content;
chatHistory.AddAssistantMessage(chatResponse);
return chatHistory;
}
public IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
}
}
}
With both services implemented, we can now code with Semantic Kernel to access these services.
The following code:
// Copyright (c) 2024
// Author : Bruno Capuano
// Change Log :
// – Sample console application to use llama2 LLM running locally in Ubuntu with Semantic Kernel
//
// The MIT License (MIT)
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
using Microsoft.Extensions.DependencyInjection;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.TextGeneration;
using sk_ollamacsharp;
// llama2 in Ubuntu local in WSL
var ollamaChat = new OllamaChatCompletionService();
ollamaChat.ModelUrl = "http://localhost:11434";
ollamaChat.ModelName = "llama2";
var ollamaText = new OllamaTextGenerationService();
ollamaText.ModelUrl = "http://localhost:11434";
ollamaText.ModelName = "llama2";
// semantic kernel builder
var builder = Kernel.CreateBuilder();
builder.Services.AddKeyedSingleton<IChatCompletionService>("ollamaChat", ollamaChat);
builder.Services.AddKeyedSingleton<ITextGenerationService>("ollamaText", ollamaText);
var kernel = builder.Build();
// text generation
var textGen = kernel.GetRequiredService<ITextGenerationService>();
var response = textGen.GetTextContentsAsync("The weather in January in Toronto is usually ").Result;
Console.WriteLine(response[^1].Text);
// chat
var chat = kernel.GetRequiredService<IChatCompletionService>();
var history = new ChatHistory();
history.AddSystemMessage("You are a useful assistant that replies using a funny style and emojis. Your name is Goku.");
history.AddUserMessage("hi, who are you?");
// print response
var result = await chat.GetChatMessageContentsAsync(history);
Console.WriteLine(result[^1].Content);
The full code is available here: https://github.com/elbruno/semantickernel-localLLMs. And the main readme of the repo needs to be also updates.
Happy coding!
Greetings
Bruno Capuano
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.