async function createDesktop() { console.log('Creating desktop...'); const launchResult = await cyberdesk.launchDesktop({ body: { timeout_ms: 3600000 } // Optional: 1-hour timeout }); if ('error' in launchResult) { console.error('Failed to create desktop:', launchResult.error); throw new Error('Failed to create desktop: ' + launchResult.error); } console.log(`Desktop created with ID: ${launchResult.id}`); // Note: You might need to wait or use getDesktop to get the streamUrl return launchResult.id;}// Example usage:// const desktopId = await createDesktop();
async function stopDesktop(desktopId: string) { console.log(`Stopping desktop ${desktopId}...`); const stopResult = await cyberdesk.terminateDesktop({ path: { id: desktopId } }); if ('error' in stopResult) { console.error('Failed to stop desktop:', stopResult.error); // Decide if you need to throw an error or just log } console.log('Desktop stop requested.', stopResult);}// Example usage:// await stopDesktop(desktopId);
Use executeBashAction to open a browser (e.g., Firefox) in the background, then use executeComputerAction with type: 'wait' to allow time for it to load.
async function openBrowserAndWait(desktopId: string, url: string, waitMs: number = 5000) { console.log(`Opening ${url} on desktop ${desktopId}...`); const openResult = await cyberdesk.executeBashAction({ path: { id: desktopId }, body: { command: `firefox ${url} &` // Run in background } }); if ('error' in openResult) { console.error('Failed to send open browser command:', openResult.error); throw new Error('Failed to send open browser command: ' + openResult.error); } console.log('Browser opening command sent.'); console.log(`Waiting ${waitMs}ms for browser to load...`); const waitResult = await cyberdesk.executeComputerAction({ path: { id: desktopId }, body: { type: 'wait', ms: waitMs } }); if ('error' in waitResult) { console.error('Wait action failed:', waitResult.error); throw new Error('Wait action failed: ' + waitResult.error); } console.log('Wait complete.');}// Example usage:// await openBrowserAndWait(desktopId, 'https://example.com');
This tutorial demonstrates how to integrate the Cyberdesk SDK with AI models (like Anthropic's Claude) using the Vercel AI SDK to create agents that can use computers.
Create a utility function that maps AI tool parameters to the Cyberdesk SDK's executeComputerAction method. This function handles the various action types supported by the AI tool.
// src/utils/computer-use.tsimport client from '@/lib/cyberdeskClient';import type { ExecuteComputerActionParams } from "cyberdesk"// Define the action types your AI tool might useexport type ClaudeComputerActionType0124 = /* ... (action types like 'left_click', 'type', etc.) ... */ | "left_click" | "right_click" // ... (include all action types from the provided code) ... | "screenshot";export async function executeComputerAction( action: ClaudeComputerActionType0124, desktopId: string, coordinate?: { x: number; y: number }, text?: string, duration?: number, scroll_amount?: number, scroll_direction?: "left" | "right" | "down" | "up", start_coordinate?: { x: number; y: number }): Promise<string | { type: "image"; data: string }> { try { let requestBody: ExecuteComputerActionParams['body']; // Map the AI tool action to the Cyberdesk SDK's expected format switch (action) { case 'left_click': requestBody = { type: 'click_mouse', x: coordinate?.x, y: coordinate?.y, button: 'left', click_type: 'click', num_of_clicks: 1 }; break; // ... Map other actions (right_click, type, scroll, screenshot, etc.) ... case 'type': requestBody = { type: 'type', text: text || '' }; break; case 'screenshot': requestBody = { type: 'screenshot' }; break; // ... (Include all case mappings from the provided computer-use.ts code) ... default: { const _exhaustiveCheck: never = action; throw new Error(`Unhandled action: ${action}`); } } const clientParams: ExecuteComputerActionParams = { path: { id: desktopId }, body: requestBody }; // *** Use the Cyberdesk SDK client *** const result = await client.executeComputerAction(clientParams); // Check the raw response status embedded in the SDK result if (result.response.status !== 200) { let errorDetails = `Failed with status: ${result.response.status}`; try { // Attempt to parse error details from the response body const errorBody = await result.response.json(); errorDetails = errorBody.message || errorBody.error || JSON.stringify(errorBody); } catch (e) { /* Failed to parse body */ } throw new Error(`Failed to execute computer action ${action}: ${errorDetails}`); } const data = result.data; // Access the parsed data from the SDK result if (data?.base64_image) { return { type: "image", data: data.base64_image }; } return data?.output || 'Action completed successfully'; } catch (error) { console.error(`Error executing computer action ${action}:`, error); throw error; // Re-throw to be handled by the AI SDK }}
Note: The full mapping logic for all action types is omitted for brevity but should be included as shown in the computer-use.ts file.
By wrapping the Cyberdesk SDK methods within utility functions called by your AI tool's execute logic, you can seamlessly integrate robust desktop control into your AI agents. The SDK handles the direct API communication, authentication, and response parsing, simplifying your integration code.