Node.js SDK Complete Guide

Quickly integrate the LLM API with a TypeScript-supported Node.js SDK.

TypeScript

Full type support

Streaming

Real-time data stream

Framework integration

Next.js/Express

Async concurrency

High-performance processing

1. Install & Configure

Quick install

# Install with npm
npm install @n1n/llm-api

# Install with yarn
yarn add @n1n/llm-api

# Install with pnpm
pnpm add @n1n/llm-api

# TypeScript type definitions
npm install --save-dev @types/node

2. Basic Usage

Getting Started

import { LLMClient } from '@n1n/llm-api';

// Initialize client
const client = new LLMClient({
  apiKey: process.env.LLM_API_KEY,
  baseURL: 'https://api.n1n.ai/v1'
});

// Basic chat
async function chat() {
  try {
    const response = await client.chat.completions.create({
      model: 'gpt-3.5-turbo',
      messages: [
        { role: 'system', content: 'You are a helpful assistant' },
        { role: 'user', content: 'Implement quicksort in JavaScript' }
      ],
      temperature: 0.7,
      max_tokens: 500
    });
    
    console.log(response.choices[0].message.content);
    console.log(`Token usage: ${response.usage.total_tokens}`);
    
  } catch (error) {
    console.error('API call failed:', error);
  }
}

// TypeScript strong typing support
import { ChatCompletionMessage, ChatCompletionResponse } from '@n1n/llm-api';

interface ConversationParams {
  messages: ChatCompletionMessage[];
  model?: string;
  temperature?: number;
}

async function typedChat(params: ConversationParams): Promise<string> {
  const response: ChatCompletionResponse = await client.chat.completions.create({
    model: params.model || 'gpt-3.5-turbo',
    messages: params.messages,
    temperature: params.temperature || 0.7
  });
  
  return response.choices[0].message.content;
}

3. Streaming Responses

Real-time streaming

import { LLMClient } from '@n1n/llm-api';

const client = new LLMClient({ apiKey: process.env.LLM_API_KEY });

// Streaming response handling
async function streamChat() {
  const stream = await client.chat.completions.create({
    model: 'gpt-3.5-turbo',
    messages: [{ role: 'user', content: 'Write a story' }],
    stream: true
  });
  
  // Method 1: for-await loop
  for await (const chunk of stream) {
    const content = chunk.choices[0]?.delta?.content || '';
    process.stdout.write(content);
  }
}

// Method 2: event listeners
async function streamWithEvents() {
  const stream = await client.chat.completions.create({
    model: 'gpt-3.5-turbo',
    messages: [{ role: 'user', content: 'Explain quantum computing' }],
    stream: true
  });
  
  stream.on('content', (content: string) => {
    process.stdout.write(content);
  });
  
  stream.on('error', (error: Error) => {
    console.error('Stream error:', error);
  });
  
  stream.on('end', () => {
    console.log('
Stream ended');
  });
}

// Express SSE streaming response
import express from 'express';

const app = express();

app.get('/stream', async (req, res) => {
  res.setHeader('Content-Type', 'text/event-stream');
  res.setHeader('Cache-Control', 'no-cache');
  res.setHeader('Connection', 'keep-alive');
  
  const stream = await client.chat.completions.create({
    model: 'gpt-3.5-turbo',
    messages: [{ role: 'user', content: req.query.prompt }],
    stream: true
  });
  
  for await (const chunk of stream) {
    const content = chunk.choices[0]?.delta?.content || '';
    res.write(`data: ${JSON.stringify({ content })}\n\n`);
  }
  
  res.write('data: [DONE]\n\n');
  res.end();
});

4. Advanced Features

Function calling & batching

// Function Calling
const response = await client.chat.completions.create({
  model: 'gpt-3.5-turbo',
  messages: [{ role: 'user', content: 'How to get Beijing weather? ' }],
  functions: [{
    name: 'get_weather',
    description: 'Get city weather',
    parameters: {
      type: 'object',
      properties: {
        location: { type: 'string', description: 'City name' },
        unit: { type: 'string', enum: ['celsius', 'fahrenheit'] }
      },
      required: ['location']
    }
  }],
  function_call: 'auto'
});

if (response.choices[0].finish_reason === 'function_call') {
  const functionCall = response.choices[0].message.function_call;
  const args = JSON.parse(functionCall.arguments);
  
  // 执行函数
  const weatherData = await getWeather(args.location, args.unit);
  
  // 返回结果给Model
  const finalResponse = await client.chat.completions.create({
    model: 'gpt-3.5-turbo',
    messages: [
      { role: 'user', content: 'How to get Beijing weather? ' },
      response.choices[0].message,
      {
        role: 'function',
        name: 'get_weather',
        content: JSON.stringify(weatherData)
      }
    ]
  });
}

// 并发批处理
import pLimit from 'p-limit';

const limit = pLimit(5); // 限制并发数为5

async function batchProcess(prompts: string[]) {
  const promises = prompts.map(prompt => 
    limit(() => client.chat.completions.create({
      model: 'gpt-3.5-turbo',
      messages: [{ role: 'user', content: prompt }],
      temperature: 0.3
    }))
  );
  
  const responses = await Promise.all(promises);
  return responses.map(r => r.choices[0].message.content);
}

// 会话管理器
class ConversationManager {
  private messages: ChatCompletionMessage[] = [];
  private maxHistory = 10;
  
  addSystemMessage(content: string) {
    this.messages.push({ role: 'system', content });
  }
  
  async sendMessage(content: string): Promise<string> {
    this.messages.push({ role: 'user', content });
    
    // 保持历史在限制内
    if (this.messages.length > this.maxHistory) {
      const systemMessages = this.messages.filter(m => m.role === 'system');
      const recentMessages = this.messages.slice(-(this.maxHistory - systemMessages.length));
      this.messages = [...systemMessages, ...recentMessages];
    }
    
    const response = await client.chat.completions.create({
      model: 'gpt-3.5-turbo',
      messages: this.messages
    });
    
    const reply = response.choices[0].message;
    this.messages.push(reply);
    
    return reply.content;
  }
}

5. Next.js Integration

App Router API

// app/api/chat/route.ts
import { LLMClient } from '@n1n/llm-api';
import { NextRequest, NextResponse } from 'next/server';

const client = new LLMClient({
  apiKey: process.env.LLM_API_KEY
});

export async function POST(request: NextRequest) {
  try {
    const { messages } = await request.json();
    
    const response = await client.chat.completions.create({
      model: 'gpt-3.5-turbo',
      messages,
      temperature: 0.7
    });
    
    return NextResponse.json({
      content: response.choices[0].message.content,
      usage: response.usage
    });
    
  } catch (error) {
    return NextResponse.json(
      { error: 'APICall failed' },
      { status: 500 }
    );
  }
}

// 流式响应 API
export async function GET(request: NextRequest) {
  const encoder = new TextEncoder();
  const stream = new TransformStream();
  const writer = stream.writable.getWriter();
  
  const prompt = request.nextUrl.searchParams.get('prompt') || '';
  
  // Process stream asynchronously
  (async () => {
    const llmStream = await client.chat.completions.create({
      model: 'gpt-3.5-turbo',
      messages: [{ role: 'user', content: prompt }],
      stream: true
    });
    
    for await (const chunk of llmStream) {
      const content = chunk.choices[0]?.delta?.content || '';
      await writer.write(encoder.encode(`data: ${JSON.stringify({ content })}\n\n`));
    }
    
    await writer.write(encoder.encode('data: [DONE]\n\n'));
    await writer.close();
  })();
  
  return new Response(stream.readable, {
    headers: {
      'Content-Type': 'text/event-stream',
      'Cache-Control': 'no-cache',
      'Connection': 'keep-alive'
    }
  });
}

6. Best Practices

⚡ Performance optimization

  • ✅ Use connection pooling
  • ✅ Implement request caching
  • ✅ Batch requests
  • ✅ Use Worker Threads
  • ✅ Stream large data

🔒 Security practices

  • ✅ Manage keys via environment variables
  • ✅ Rate limit requests
  • ✅ Validate and sanitize inputs
  • ✅ HTTPS transport encryption
  • ✅ Sanitize error messages