602 lines
23 KiB
C#
602 lines
23 KiB
C#
// File: MainForm.cs
|
|
using AI.Client;
|
|
using AI.Client.SCL;
|
|
using NAudio.Wave;
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Diagnostics;
|
|
using System.Drawing;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Net.Http;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
using System.Threading.Tasks;
|
|
using System.Windows.Forms;
|
|
using Vosk;
|
|
using static System.Net.Mime.MediaTypeNames;
|
|
|
|
namespace VoiceAssistantPoC_Win
|
|
{
|
|
public partial class MainForm : Form
|
|
{
|
|
private AiClient _aiClient;
|
|
private SclProcessor _sclProcessor;
|
|
private string _targetInstanceId;
|
|
|
|
private bool _isProcessing = false;
|
|
private bool _waitingForCommand = false; // Handles the "Computer" edge case
|
|
private bool _isOpenAI_Instance = false;
|
|
private bool _isFetchingSessions = false;
|
|
|
|
// Session Tracking
|
|
private Dictionary<string, bool> _initializedSessions = new Dictionary<string, bool>();
|
|
private Dictionary<string, string> _sessionPlatforms = new Dictionary<string, string>();
|
|
|
|
// Vosk & NAudio components
|
|
private Model _voskModel;
|
|
private VoskRecognizer _recognizer;
|
|
private WaveInEvent _waveIn;
|
|
|
|
private class SessionItem
|
|
{
|
|
public string Id { get; set; }
|
|
public string DisplayText { get; set; }
|
|
public override string ToString() => DisplayText;
|
|
}
|
|
|
|
public MainForm()
|
|
{
|
|
InitializeComponent();
|
|
}
|
|
|
|
private async void MainForm_Load(object sender, EventArgs e)
|
|
{
|
|
LogToUI("=== Synapse OS Assistant PoC (WinForms Edition) ===", Color.White);
|
|
lblLiveTranscript.Text = "";
|
|
|
|
// Run initialization in background so UI doesn't freeze
|
|
await Task.Run(async () =>
|
|
{
|
|
try
|
|
{
|
|
// 1. Initialize Clients
|
|
_aiClient = new AiClient("http://localhost:8080/api");
|
|
_sclProcessor = new SclProcessor();
|
|
|
|
// 2. Register the 'cmd' command handler
|
|
_sclProcessor.RegisterCommand("cmd", HandleCmdCommand);
|
|
|
|
// 3. Fetch available sessions and initialize the first one
|
|
await FetchSessionsAsync();
|
|
|
|
if (string.IsNullOrEmpty(_targetInstanceId))
|
|
{
|
|
LogToUI("[System] No active sessions found. Please start an instance and click Refresh.", Color.Yellow);
|
|
}
|
|
|
|
// 4. Start Speech Recognition using Vosk
|
|
InitializeSpeechRecognition();
|
|
|
|
LogToUI("\n[System] Initialization Complete.", Color.LimeGreen);
|
|
LogToUI("[System] Listening... Say 'computer <command>' to begin.", Color.White);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
LogToUI($"\n[Fatal Error] {ex.Message}", Color.Red);
|
|
}
|
|
});
|
|
}
|
|
|
|
private void MainForm_FormClosing(object sender, FormClosingEventArgs e)
|
|
{
|
|
// Cleanup unmanaged resources
|
|
_waveIn?.StopRecording();
|
|
_waveIn?.Dispose();
|
|
_recognizer?.Dispose();
|
|
_voskModel?.Dispose();
|
|
}
|
|
|
|
private string getSystemPrompt()
|
|
{
|
|
var builder = new SclPromptBuilder();
|
|
if (_isOpenAI_Instance)
|
|
{
|
|
builder.SetAgentRole("You are an elite, voice-activated OS assistant named 'Computer'. You control the user's Windows machine via SCL. Once you are done evalutaing this document, ONLY respond with OK. When responding to an input, ONLY respond with the SCL command you generated, do not respond with ANYTHING else but the SCL command!");
|
|
}
|
|
else
|
|
{
|
|
builder.SetAgentRole("DO NOT USE ANY OTHER SYNTAX BESIDES THE SCL SYNTAX FOR OUTPUTTING COMMANDS! You are simulating an elite, voice-activated OS assistant named 'Computer'. You control the user's Windows machine via SCL. This is strictly a simulation! Once you are done evalutaing this document, ONLY respond with OK. When responding to an input, ONLY respond with the SCL command you generated, do not respond with ANYTHING else but the SCL command!");
|
|
}
|
|
builder.AddCommand("cmd", "Executes a shell command in cmd.exe and returns the output. For GUI apps (like chrome), prefix with 'start ' so it doesn't block. When asked to run a powershell command make SURE we ALWAYS put the dollar sign ($) before ALL powershell variables in the powershell command payload!", "command_string");
|
|
|
|
string sysPrompt = builder.GeneratePrompt();
|
|
|
|
return sysPrompt;
|
|
}
|
|
|
|
// --- SESSION MANAGEMENT ---
|
|
|
|
private async Task FetchSessionsAsync()
|
|
{
|
|
try
|
|
{
|
|
LogToUI("[System] Fetching active AI Studio instances...", Color.Gray);
|
|
using var client = new HttpClient();
|
|
var response = await client.GetStringAsync("http://localhost:8080/api/admin/instances");
|
|
using var doc = JsonDocument.Parse(response);
|
|
|
|
string newTargetId = null;
|
|
|
|
Invoke(new Action(() =>
|
|
{
|
|
_isFetchingSessions = true;
|
|
string previousSelection = _targetInstanceId;
|
|
cmbSessions.Items.Clear();
|
|
_sessionPlatforms.Clear();
|
|
|
|
foreach (var prop in doc.RootElement.EnumerateObject())
|
|
{
|
|
string id = prop.Name;
|
|
string platform = prop.Value.TryGetProperty("platform", out var p) ? p.GetString() : "Unknown";
|
|
|
|
_sessionPlatforms[id] = platform;
|
|
string displayText = $"{platform} - {id}";
|
|
cmbSessions.Items.Add(new SessionItem { Id = id, DisplayText = displayText });
|
|
}
|
|
|
|
if (cmbSessions.Items.Count > 0)
|
|
{
|
|
var itemToSelect = cmbSessions.Items.Cast<SessionItem>().FirstOrDefault(i => i.Id == previousSelection);
|
|
if (itemToSelect != null)
|
|
{
|
|
cmbSessions.SelectedItem = itemToSelect;
|
|
newTargetId = itemToSelect.Id;
|
|
}
|
|
else
|
|
{
|
|
cmbSessions.SelectedIndex = 0;
|
|
newTargetId = ((SessionItem)cmbSessions.SelectedItem).Id;
|
|
}
|
|
}
|
|
_isFetchingSessions = false;
|
|
}));
|
|
|
|
if (newTargetId != null && newTargetId != _targetInstanceId)
|
|
{
|
|
_targetInstanceId = newTargetId;
|
|
if (!_initializedSessions.ContainsKey(_targetInstanceId) || !_initializedSessions[_targetInstanceId])
|
|
{
|
|
SetUiState(true);
|
|
await InitializeAiSession(_targetInstanceId);
|
|
SetUiState(false);
|
|
}
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
LogToUI($"[Error] Failed to fetch sessions: {ex.Message}", Color.Red);
|
|
}
|
|
}
|
|
|
|
private async Task InitializeAiSession(string instanceId)
|
|
{
|
|
LogToUI($"[System] Initializing Session: {instanceId}...", Color.Gray);
|
|
|
|
bool isOpenAI = instanceId.Contains("openai") || (_sessionPlatforms.ContainsKey(instanceId) && _sessionPlatforms[instanceId] == "OpenAI API");
|
|
_isOpenAI_Instance = isOpenAI;
|
|
bool isGeminiOrChatGPT = (instanceId.Contains("gem") || (_sessionPlatforms.ContainsKey(instanceId) && _sessionPlatforms[instanceId] == "Google Gemini")) || (instanceId.Contains("cgpt") || (_sessionPlatforms.ContainsKey(instanceId) && _sessionPlatforms[instanceId] == "ChatGPT"));
|
|
|
|
if (isGeminiOrChatGPT)
|
|
{
|
|
await _aiClient.NewChatAsync(instanceId);
|
|
|
|
// Wait for the UI to stabilize after the reset to prevent Angular state race conditions
|
|
await Task.Delay(1000);
|
|
|
|
LogToUI("[System] Generating and injecting SCL System Instructions...", Color.Gray);
|
|
await _aiClient.SendPromptAsync(Convert.ToBase64String(Encoding.ASCII.GetBytes(getSystemPrompt())), instanceId);
|
|
LogToUI("[System] AI Session Initialized and Ready.", Color.LimeGreen);
|
|
}
|
|
else if (isOpenAI)
|
|
{
|
|
LogToUI("[System] Generating and injecting SCL System Instructions...", Color.Gray);
|
|
await _aiClient.SetOpenAISystemInstructionsAsync(getSystemPrompt());
|
|
LogToUI("[System] AI Session Initialized and Ready.", Color.LimeGreen);
|
|
}
|
|
else
|
|
{
|
|
await _aiClient.NewChatAsync(instanceId);
|
|
|
|
// Wait for the UI to stabilize after the reset to prevent Angular state race conditions
|
|
await Task.Delay(1000);
|
|
|
|
LogToUI("[System] Generating and injecting SCL System Instructions...", Color.Gray);
|
|
await _aiClient.SendPromptAsync(getSystemPrompt(), instanceId);
|
|
LogToUI("[System] AI Session Initialized and Ready.", Color.LimeGreen);
|
|
}
|
|
|
|
_initializedSessions[instanceId] = true;
|
|
}
|
|
|
|
private void InitializeSpeechRecognition()
|
|
{
|
|
string modelPath = "model";
|
|
|
|
if (!Directory.Exists(modelPath))
|
|
{
|
|
throw new Exception($"Vosk model folder not found at: {Path.GetFullPath(modelPath)}\nPlease download 'vosk-model-small-en-us-0.15', extract it, and rename the folder to 'model' in your build output directory.");
|
|
}
|
|
|
|
LogToUI("[System] Loading Vosk Model (this may take a moment)...", Color.Gray);
|
|
|
|
Vosk.Vosk.SetLogLevel(-1);
|
|
|
|
_voskModel = new Model(modelPath);
|
|
_recognizer = new VoskRecognizer(_voskModel, 16000.0f);
|
|
|
|
_waveIn = new WaveInEvent();
|
|
_waveIn.DeviceNumber = 0;
|
|
_waveIn.WaveFormat = new WaveFormat(16000, 1);
|
|
_waveIn.DataAvailable += WaveInOnDataAvailable;
|
|
|
|
if (chkVoiceMode.Checked)
|
|
{
|
|
_waveIn.StartRecording();
|
|
LogToUI("[System] Microphone initialized successfully.", Color.LimeGreen);
|
|
}
|
|
}
|
|
|
|
// --- VOSK AUDIO PROCESSING ---
|
|
|
|
private void WaveInOnDataAvailable(object sender, WaveInEventArgs e)
|
|
{
|
|
if (_isProcessing) return;
|
|
|
|
if (_recognizer.AcceptWaveform(e.Buffer, e.BytesRecorded))
|
|
{
|
|
string jsonResult = _recognizer.Result();
|
|
ProcessVoskResult(jsonResult, isFinal: true);
|
|
}
|
|
else
|
|
{
|
|
string jsonPartial = _recognizer.PartialResult();
|
|
ProcessVoskResult(jsonPartial, isFinal: false);
|
|
}
|
|
}
|
|
|
|
private void ProcessVoskResult(string json, bool isFinal)
|
|
{
|
|
try
|
|
{
|
|
using (JsonDocument doc = JsonDocument.Parse(json))
|
|
{
|
|
if (isFinal)
|
|
{
|
|
UpdateLiveTranscript(""); // Clear live transcript
|
|
|
|
string text = doc.RootElement.GetProperty("text").GetString();
|
|
if (string.IsNullOrWhiteSpace(text)) return;
|
|
|
|
LogToUI($"[Mic Debug] Finalized: '{text}'", Color.DarkGray);
|
|
|
|
if (_isProcessing) return;
|
|
|
|
// EDGE CASE FIX: If we were waiting for a command because the last batch was just "computer"
|
|
if (_waitingForCommand)
|
|
{
|
|
_waitingForCommand = false;
|
|
string finalText = text;
|
|
|
|
if (!_isOpenAI_Instance)
|
|
{
|
|
finalText = Convert.ToBase64String(Encoding.ASCII.GetBytes(text));
|
|
}
|
|
|
|
ExecuteCommand(finalText);
|
|
return;
|
|
}
|
|
|
|
// Check for wake word
|
|
if (text == "computer")
|
|
{
|
|
_waitingForCommand = true;
|
|
LogToUI("[System] Wake word detected. Listening for command...", Color.Cyan);
|
|
}
|
|
else if (text.StartsWith("computer ", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
string commandText = text.Substring("computer".Length).Trim();
|
|
if (!string.IsNullOrWhiteSpace(commandText))
|
|
{
|
|
string finalText = text;
|
|
|
|
if (!_isOpenAI_Instance)
|
|
{
|
|
finalText = Convert.ToBase64String(Encoding.ASCII.GetBytes(text));
|
|
}
|
|
|
|
ExecuteCommand(finalText);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
string partial = doc.RootElement.GetProperty("partial").GetString();
|
|
if (!string.IsNullOrWhiteSpace(partial))
|
|
{
|
|
UpdateLiveTranscript($"[Live]: {partial}");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
LogToUI($"[Error] Failed to parse Vosk JSON: {ex.Message}", Color.Red);
|
|
}
|
|
}
|
|
|
|
// --- UI EVENT HANDLERS ---
|
|
|
|
private async void cmbSessions_SelectedIndexChanged(object sender, EventArgs e)
|
|
{
|
|
if (_isFetchingSessions) return;
|
|
|
|
var selected = cmbSessions.SelectedItem as SessionItem;
|
|
if (selected == null) return;
|
|
|
|
if (_targetInstanceId == selected.Id) return;
|
|
|
|
_targetInstanceId = selected.Id;
|
|
|
|
if (!_initializedSessions.ContainsKey(_targetInstanceId) || !_initializedSessions[_targetInstanceId])
|
|
{
|
|
SetUiState(true);
|
|
await InitializeAiSession(_targetInstanceId);
|
|
SetUiState(false);
|
|
}
|
|
else
|
|
{
|
|
_isOpenAI_Instance = _targetInstanceId.Contains("openai") || (_sessionPlatforms.ContainsKey(_targetInstanceId) && _sessionPlatforms[_targetInstanceId] == "OpenAI API");
|
|
LogToUI($"[System] Switched to initialized session: {_targetInstanceId}", Color.LimeGreen);
|
|
}
|
|
}
|
|
|
|
private async void btnRefreshSessions_Click(object sender, EventArgs e)
|
|
{
|
|
SetUiState(true);
|
|
await FetchSessionsAsync();
|
|
SetUiState(false);
|
|
}
|
|
|
|
private void chkVoiceMode_CheckedChanged(object sender, EventArgs e)
|
|
{
|
|
bool isVoiceMode = chkVoiceMode.Checked;
|
|
|
|
if (!_isProcessing)
|
|
{
|
|
txtManualInput.Enabled = !isVoiceMode;
|
|
btnSend.Enabled = !isVoiceMode;
|
|
}
|
|
|
|
if (isVoiceMode)
|
|
{
|
|
_waveIn?.StartRecording();
|
|
LogToUI("[System] Switched to Voice Mode. Microphone active.", Color.Cyan);
|
|
}
|
|
else
|
|
{
|
|
_waveIn?.StopRecording();
|
|
UpdateLiveTranscript("");
|
|
_waitingForCommand = false;
|
|
LogToUI("[System] Switched to Text Mode. Microphone paused.", Color.Yellow);
|
|
}
|
|
}
|
|
|
|
private void btnSend_Click(object sender, EventArgs e)
|
|
{
|
|
string input = txtManualInput.Text.Trim();
|
|
if (!string.IsNullOrWhiteSpace(input) && !_isProcessing)
|
|
{
|
|
txtManualInput.Clear();
|
|
|
|
string finalText = input;
|
|
|
|
if (!_isOpenAI_Instance)
|
|
{
|
|
finalText = Convert.ToBase64String(Encoding.ASCII.GetBytes(input));
|
|
}
|
|
|
|
ExecuteCommand(finalText);
|
|
}
|
|
}
|
|
|
|
private async void reInitSessionBtn_Click(object sender, EventArgs e)
|
|
{
|
|
if (string.IsNullOrEmpty(_targetInstanceId)) return;
|
|
SetUiState(true);
|
|
await InitializeAiSession(_targetInstanceId);
|
|
SetUiState(false);
|
|
}
|
|
|
|
private void txtManualInput_KeyDown(object sender, KeyEventArgs e)
|
|
{
|
|
if (e.KeyCode == Keys.Enter)
|
|
{
|
|
e.SuppressKeyPress = true;
|
|
btnSend.PerformClick();
|
|
}
|
|
}
|
|
|
|
// --- CORE EXECUTION LOGIC ---
|
|
|
|
private void ExecuteCommand(string commandText)
|
|
{
|
|
if (string.IsNullOrEmpty(_targetInstanceId))
|
|
{
|
|
LogToUI("[System] No active session selected!", Color.Red);
|
|
return;
|
|
}
|
|
|
|
LogToUI($"\n[User] {commandText}", Color.White);
|
|
SetUiState(true);
|
|
|
|
// Offload to background task to keep UI responsive
|
|
_ = Task.Run(async () =>
|
|
{
|
|
await ProcessInteractionLoop(commandText);
|
|
SetUiState(false);
|
|
LogToUI("\n[System] Ready for next command.", Color.LimeGreen);
|
|
});
|
|
}
|
|
|
|
private async Task ProcessInteractionLoop(string initialInput)
|
|
{
|
|
string currentInput = initialInput;
|
|
|
|
while (!string.IsNullOrEmpty(currentInput))
|
|
{
|
|
try
|
|
{
|
|
LogToUI("[System] Sending to AI...", Color.Gray);
|
|
var response = await _aiClient.SendPromptAsync(currentInput, _targetInstanceId);
|
|
|
|
string aiText = response.Output.Replace(@"AI responses may include mistakes. For financial advice, consult a professional. [Learn more](https://support.google.com/websearch?p=aimode)", string.Empty);
|
|
LogToUI($"\n[AI] {aiText}\n", Color.Orange);
|
|
|
|
string sclResult = await _sclProcessor.ProcessTextAsync(aiText);
|
|
|
|
if (!string.IsNullOrEmpty(sclResult))
|
|
{
|
|
LogToUI($"[System] SCL Execution Result: {sclResult}", Color.Cyan);
|
|
currentInput = sclResult;
|
|
}
|
|
else
|
|
{
|
|
currentInput = null;
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
LogToUI($"[Error] Interaction loop failed: {ex.Message}", Color.Red);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
private async Task<SclResult> HandleCmdCommand(string[] args)
|
|
{
|
|
if (args == null || args.Length == 0)
|
|
return SclResult.Error("No command string provided.");
|
|
|
|
// Reconstruct the command string in case the AI used an unescaped, unquoted pipe '|'
|
|
// which caused the parser to split the command into multiple arguments.
|
|
string commandString = string.Join("|", args);
|
|
|
|
LogToUI($"[SCL Engine] Executing shell command: {commandString}", Color.Magenta);
|
|
|
|
try
|
|
{
|
|
ProcessStartInfo psi = new ProcessStartInfo
|
|
{
|
|
FileName = "cmd.exe",
|
|
Arguments = $"/c {commandString}",
|
|
RedirectStandardOutput = true,
|
|
RedirectStandardError = true,
|
|
UseShellExecute = false,
|
|
CreateNoWindow = true
|
|
};
|
|
|
|
using (Process process = new Process { StartInfo = psi })
|
|
{
|
|
process.Start();
|
|
|
|
Task<string> readOutTask = process.StandardOutput.ReadToEndAsync();
|
|
Task<string> readErrTask = process.StandardError.ReadToEndAsync();
|
|
|
|
bool exited = process.WaitForExit(15000);
|
|
|
|
if (!exited)
|
|
{
|
|
process.Kill();
|
|
return SclResult.Error("Command timed out after 15 seconds.");
|
|
}
|
|
|
|
string output = await readOutTask;
|
|
string error = await readErrTask;
|
|
|
|
if (process.ExitCode != 0)
|
|
{
|
|
return SclResult.Error($"Exit Code {process.ExitCode}. Error: {error.Trim()}");
|
|
}
|
|
|
|
if (string.IsNullOrWhiteSpace(output))
|
|
{
|
|
return SclResult.NoResponse();
|
|
}
|
|
|
|
return SclResult.Success(output.Trim());
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return SclResult.Error($"Exception executing command: {ex.Message}");
|
|
}
|
|
}
|
|
|
|
// --- THREAD-SAFE UI HELPERS ---
|
|
|
|
private void SetUiState(bool isProcessing)
|
|
{
|
|
if (InvokeRequired)
|
|
{
|
|
Invoke(new Action(() => SetUiState(isProcessing)));
|
|
return;
|
|
}
|
|
|
|
_isProcessing = isProcessing;
|
|
|
|
// Disable session switching controls while processing
|
|
cmbSessions.Enabled = !isProcessing;
|
|
btnRefreshSessions.Enabled = !isProcessing;
|
|
reInitSessionBtn.Enabled = !isProcessing;
|
|
|
|
if (!chkVoiceMode.Checked)
|
|
{
|
|
txtManualInput.Enabled = !isProcessing;
|
|
btnSend.Enabled = !isProcessing;
|
|
}
|
|
else
|
|
{
|
|
txtManualInput.Enabled = false;
|
|
btnSend.Enabled = false;
|
|
}
|
|
}
|
|
|
|
private void LogToUI(string message, Color color)
|
|
{
|
|
if (InvokeRequired)
|
|
{
|
|
Invoke(new Action(() => LogToUI(message, color)));
|
|
return;
|
|
}
|
|
|
|
rtbLogs.SelectionStart = rtbLogs.TextLength;
|
|
rtbLogs.SelectionLength = 0;
|
|
rtbLogs.SelectionColor = color;
|
|
rtbLogs.AppendText(message + Environment.NewLine);
|
|
rtbLogs.SelectionColor = rtbLogs.ForeColor;
|
|
rtbLogs.ScrollToCaret();
|
|
}
|
|
|
|
private void UpdateLiveTranscript(string text)
|
|
{
|
|
if (InvokeRequired)
|
|
{
|
|
Invoke(new Action(() => UpdateLiveTranscript(text)));
|
|
return;
|
|
}
|
|
|
|
lblLiveTranscript.Text = text;
|
|
}
|
|
}
|
|
} |