mirror of
https://github.com/wavetermdev/waveterm
synced 2026-04-21 14:37:16 +00:00
Add Google AI file summarization package (#2455)
- [x] Create new directory pkg/aiusechat/google - [x] Implement SummarizeFile function with: - Context parameter for timeout - File validation (images, PDFs, text files only) - Use gemini-2.5-flash-lite model - Configurable API URL and prompt as constants - Return (string, usage, error) - [x] Define Google-specific usage struct - [x] Test the implementation (all tests pass) - [x] Verify with existing linting and build - [x] Run CodeQL security check (no issues found) - [x] Revert unintended tsunami demo dependency changes ## Summary Successfully implemented a new Google AI package at `pkg/aiusechat/google` with: 1. **SummarizeFile function** - A simple request-response API (not streaming, not SSE) - Takes context for timeout - Validates file types (images, PDFs, text only) - Enforces file size limits matching wshcmd-ai.go - Uses gemini-2.5-flash-lite model - Returns (summary string, usage stats, error) 2. **GoogleUsage struct** - Tracks token consumption: - PromptTokenCount - CachedContentTokenCount - CandidatesTokenCount - TotalTokenCount 3. **Configurable constants**: - GoogleAPIURL (for reference) - SummarizePrompt (customizable prompt) - SummarizeModel (gemini-2.5-flash-lite) 4. **Comprehensive tests** - 41.7% coverage with all tests passing 5. **Security verified** - No CodeQL alerts 6. **Package documentation** - doc.go with usage examples Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: sawka <2722291+sawka@users.noreply.github.com>
This commit is contained in:
parent
2619c85d03
commit
0d04b99b46
4 changed files with 558 additions and 0 deletions
104
cmd/testsummarize/main-testsummarize.go
Normal file
104
cmd/testsummarize/main-testsummarize.go
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
// Copyright 2025, Command Line Inc.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/wavetermdev/waveterm/pkg/aiusechat/google"
|
||||
)
|
||||
|
||||
func printUsage() {
|
||||
fmt.Println("Usage: go run main-testsummarize.go [--help] [--mode MODE] <filename>")
|
||||
fmt.Println("Examples:")
|
||||
fmt.Println(" go run main-testsummarize.go README.md")
|
||||
fmt.Println(" go run main-testsummarize.go --mode useful /path/to/image.png")
|
||||
fmt.Println(" go run main-testsummarize.go -m publiccode document.pdf")
|
||||
fmt.Println("")
|
||||
fmt.Println("Supported file types:")
|
||||
fmt.Println(" - Text files (up to 200KB)")
|
||||
fmt.Println(" - Images (up to 7MB)")
|
||||
fmt.Println(" - PDFs (up to 5MB)")
|
||||
fmt.Println("")
|
||||
fmt.Println("Flags:")
|
||||
fmt.Println(" --mode, -m Summarization mode (default: quick)")
|
||||
fmt.Println(" Options: quick, useful, publiccode, htmlcontent, htmlfull")
|
||||
fmt.Println("")
|
||||
fmt.Println("Environment variables:")
|
||||
fmt.Println(" GOOGLE_APIKEY (required)")
|
||||
}
|
||||
|
||||
func main() {
|
||||
var showHelp bool
|
||||
var mode string
|
||||
flag.BoolVar(&showHelp, "help", false, "Show usage information")
|
||||
flag.StringVar(&mode, "mode", "quick", "Summarization mode")
|
||||
flag.StringVar(&mode, "m", "quick", "Summarization mode (shorthand)")
|
||||
flag.Parse()
|
||||
|
||||
if showHelp {
|
||||
printUsage()
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
apiKey := os.Getenv("GOOGLE_APIKEY")
|
||||
if apiKey == "" {
|
||||
fmt.Println("Error: GOOGLE_APIKEY environment variable not set")
|
||||
printUsage()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
args := flag.Args()
|
||||
if len(args) == 0 {
|
||||
fmt.Println("Error: filename required")
|
||||
printUsage()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
filename := args[0]
|
||||
|
||||
// Check if file exists
|
||||
if _, err := os.Stat(filename); os.IsNotExist(err) {
|
||||
fmt.Printf("Error: file '%s' does not exist\n", filename)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
defer cancel()
|
||||
|
||||
fmt.Printf("Summarizing file: %s\n", filename)
|
||||
fmt.Printf("Model: %s\n", google.SummarizeModel)
|
||||
fmt.Printf("Mode: %s\n", mode)
|
||||
|
||||
startTime := time.Now()
|
||||
summary, usage, err := google.SummarizeFile(ctx, filename, google.SummarizeOpts{
|
||||
APIKey: apiKey,
|
||||
Mode: mode,
|
||||
})
|
||||
latency := time.Since(startTime)
|
||||
|
||||
fmt.Printf("Latency: %d ms\n", latency.Milliseconds())
|
||||
fmt.Println("===")
|
||||
if err != nil {
|
||||
fmt.Printf("Error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
fmt.Println("\nSummary:")
|
||||
fmt.Println("---")
|
||||
fmt.Println(summary)
|
||||
fmt.Println("---")
|
||||
|
||||
if usage != nil {
|
||||
fmt.Println("\nUsage Statistics:")
|
||||
fmt.Printf(" Prompt tokens: %d\n", usage.PromptTokenCount)
|
||||
fmt.Printf(" Cached tokens: %d\n", usage.CachedContentTokenCount)
|
||||
fmt.Printf(" Response tokens: %d\n", usage.CandidatesTokenCount)
|
||||
fmt.Printf(" Total tokens: %d\n", usage.TotalTokenCount)
|
||||
}
|
||||
}
|
||||
41
pkg/aiusechat/google/doc.go
Normal file
41
pkg/aiusechat/google/doc.go
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
// Copyright 2025, Command Line Inc.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
// Package google provides Google Generative AI integration for WaveTerm.
|
||||
//
|
||||
// This package implements file summarization using Google's Gemini models.
|
||||
// Unlike other AI provider implementations in the aiusechat package, this
|
||||
// package does NOT implement full SSE streaming. It uses a simple
|
||||
// request-response API for file summarization.
|
||||
//
|
||||
// # Supported File Types
|
||||
//
|
||||
// The package supports the same file types as defined in wshcmd-ai.go:
|
||||
// - Images (PNG, JPEG, etc.): up to 7MB
|
||||
// - PDFs: up to 5MB
|
||||
// - Text files: up to 200KB
|
||||
//
|
||||
// Binary files are rejected unless they are recognized as images or PDFs.
|
||||
//
|
||||
// # Usage
|
||||
//
|
||||
// To summarize a file:
|
||||
//
|
||||
// ctx := context.Background()
|
||||
// summary, usage, err := google.SummarizeFile(ctx, "/path/to/file.txt", google.SummarizeOpts{
|
||||
// APIKey: "YOUR_API_KEY",
|
||||
// Mode: google.ModeQuickSummary,
|
||||
// })
|
||||
// if err != nil {
|
||||
// log.Fatal(err)
|
||||
// }
|
||||
// fmt.Println("Summary:", summary)
|
||||
// fmt.Printf("Tokens used: %d\n", usage.TotalTokenCount)
|
||||
//
|
||||
// # Configuration
|
||||
//
|
||||
// The summarization behavior can be customized by modifying the constants:
|
||||
// - SummarizeModel: The Gemini model to use (default: "gemini-2.5-flash-lite")
|
||||
// - SummarizePrompt: The prompt sent to the model
|
||||
// - GoogleAPIURL: The base URL for the API (for reference, not currently used by the SDK)
|
||||
package google
|
||||
283
pkg/aiusechat/google/google-summarize.go
Normal file
283
pkg/aiusechat/google/google-summarize.go
Normal file
|
|
@ -0,0 +1,283 @@
|
|||
// Copyright 2025, Command Line Inc.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package google
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/google/generative-ai-go/genai"
|
||||
"github.com/wavetermdev/waveterm/pkg/util/utilfn"
|
||||
"google.golang.org/api/option"
|
||||
)
|
||||
|
||||
const (
|
||||
// GoogleAPIURL is the base URL for the Google Generative AI API
|
||||
GoogleAPIURL = "https://generativelanguage.googleapis.com"
|
||||
|
||||
// SummarizeModel is the model used for file summarization
|
||||
SummarizeModel = "gemini-2.5-flash-lite"
|
||||
|
||||
// Mode constants
|
||||
ModeQuickSummary = "quick"
|
||||
ModeUseful = "useful"
|
||||
ModePublicCode = "publiccode"
|
||||
ModeHTMLContent = "htmlcontent"
|
||||
ModeHTMLFull = "htmlfull"
|
||||
|
||||
// SummarizePrompt is the default prompt used for file summarization
|
||||
SummarizePrompt = "Please provide a concise summary of this file. Include the main topics, key points, and any notable information."
|
||||
|
||||
// QuickSummaryPrompt is the prompt for quick file summaries
|
||||
QuickSummaryPrompt = `Summarize the following file for another AI agent that is deciding which files to read.
|
||||
|
||||
If the content is HTML or web page markup, ignore layout elements such as headers, footers, sidebars, navigation menus, cookie banners, pop-ups, ads, and search boxes.
|
||||
Focus only on the visible main content that describes the page’s subject or purpose.
|
||||
|
||||
Keep the summary extremely concise — one or two sentences at most.
|
||||
Explain what the file appears to be and its main purpose or contents.
|
||||
If it's code, mention the language and what it implements (e.g., a CLI, library, test, or config).
|
||||
Avoid speculation or verbose explanations.
|
||||
Do not include markdown, bullets, or formatting — just a plain text summary.`
|
||||
|
||||
// UsefulSummaryPrompt is the prompt for useful file summaries with more detail
|
||||
UsefulSummaryPrompt = `You are summarizing a single file so that another AI agent can understand its purpose and structure.
|
||||
|
||||
If the content is HTML or web page markup, ignore layout elements such as headers, footers, sidebars, navigation menus, cookie banners, pop-ups, ads, and search boxes.
|
||||
Focus only on the visible main content that describes the page’s subject or purpose.
|
||||
|
||||
Start with a short overview (2–4 sentences) describing the overall purpose of the file.
|
||||
If the file is large (more than about 150 lines) or has multiple major sections or functions,
|
||||
then briefly summarize each major section (1–2 sentences per section) and include an approximate line range in parentheses like "(lines 80–220)".
|
||||
|
||||
Keep section summaries extremely concise — only include the most important parts or entry points.
|
||||
If it's code, mention key functions or classes and what they do.
|
||||
If it's documentation, describe key topics or sections.
|
||||
If it's a data or config file, summarize the structure and purpose of the values.
|
||||
|
||||
Never produce more text than would fit comfortably on one screen (roughly under 200 words total).
|
||||
Plain text only — no lists, no markdown, no JSON.`
|
||||
|
||||
// PublicCodeSummaryPrompt is the prompt for public API summaries
|
||||
PublicCodeSummaryPrompt = `You are summarizing a SINGLE source file to expose its PUBLIC API to another AI client.
|
||||
|
||||
GOAL
|
||||
Produce a compact, header-like listing of all PUBLIC symbols callers would use.
|
||||
|
||||
OUTPUT FORMAT (plain text only; no bullets/markdown/JSON):
|
||||
1) Public data structures required by public functions (types/structs/interfaces/enums/const groups):
|
||||
<native one-line comment> (lines A–B)
|
||||
<exact single-line declaration>
|
||||
|
||||
2) Public functions/methods in order of appearance:
|
||||
<native one-line comment> (lines A–B)
|
||||
<exact single-line signature>
|
||||
|
||||
RULES
|
||||
- PUBLIC means exported/externally visible for the language (Go: capitalized; Java/C#/TS: public; Rust: pub; Python: not underscore-prefixed, etc.).
|
||||
- Include ALL public functions/methods.
|
||||
- Include public data structures ONLY if referenced by any public function OR commonly constructed/consumed by callers.
|
||||
- For multi-line declarations, emit a single-line canonical form by collapsing internal whitespace while preserving tokens and order.
|
||||
- The one-line comment is either a compressed docstring or, if absent, a concise inferred purpose (≤ 20 words).
|
||||
- Include approximate line ranges as "(lines A–B)".
|
||||
- Skip private helpers, tests, examples, and internal-only constants.
|
||||
- Preserve generics/annotations/modifiers as they appear (e.g., type params, async, const, noexcept).
|
||||
- No preface or epilogue text—just the listing.
|
||||
|
||||
EXAMPLE STYLE (illustrative; use the target language's comment syntax):
|
||||
// Configuration for the proxy (lines 10–42)
|
||||
type ProxyConfig struct { ... }
|
||||
|
||||
// Creates and configures a new proxy instance (lines 60–92)
|
||||
func NewProxy(cfg ProxyConfig) (*Proxy, error)
|
||||
|
||||
// Handles a single HTTP request (lines 95–168)
|
||||
func (p *Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request)`
|
||||
|
||||
// HTMLContentPrompt is the prompt for converting HTML to content-focused Markdown
|
||||
HTMLContentPrompt = `Convert the following stripped HTML into clean Markdown for READING CONTENT ONLY.
|
||||
|
||||
- Output Markdown ONLY (no explanations, no JSON, no code fences).
|
||||
- Keep document title as a single H1 if present (from <title> or first <h1>).
|
||||
- Preserve headings (h1–h6), paragraphs, strong/emphasis, inline code.
|
||||
- Convert <a> to [text](absolute_url). If href is relative, resolve against BASE_URL: {{BASE_URL}}. Do not output javascript:void links.
|
||||
- Preserve lists (ul/ol, nested), blockquotes, and code blocks (<pre><code>) as fenced code (include language if obvious).
|
||||
- Convert tables to Markdown tables; keep header row; include up to 50 data rows, then append "… (more rows)".
|
||||
- Keep images ONLY if alt text is descriptive; render as . Skip tracking pixels and decorative images.
|
||||
- Discard navigation, site header/footer, sidebars, cookie banners, search bars, newsletter/signup, social share, repetitive link clouds, and legal boilerplate unless they are the ONLY content.
|
||||
- Preserve in-page structure order; do not invent content; do not summarize prose—extract faithfully.
|
||||
- Normalize whitespace, collapse repeated blank lines to one.
|
||||
`
|
||||
|
||||
// HTMLFullPrompt is the prompt for converting HTML to navigation-focused Markdown
|
||||
HTMLFullPrompt = `Convert the following stripped HTML into Markdown optimized for SITE NAVIGATION.
|
||||
|
||||
- Output Markdown ONLY (no explanations, no JSON, no code fences).
|
||||
- Start with a top-level title (from <title> or first <h1>) as H1.
|
||||
- Include primary navigation as a section "## Navigation" with bullet lists of top-level links (use visible link text; dedupe exact duplicates).
|
||||
- Include secondary nav/footer links under "## Footer Links".
|
||||
- Then extract the main page content as Markdown (headings, paragraphs, lists, blockquotes, code blocks).
|
||||
- Convert <a> to [text](absolute_url). If href is relative, resolve against BASE_URL: {{BASE_URL}}.
|
||||
- Convert tables to Markdown tables; keep header + up to 50 rows, then "… (more rows)".
|
||||
- Keep images with meaningful alt as ; otherwise skip.
|
||||
- Preserve order as it appears in the page; do not summarize prose—extract faithfully.
|
||||
- Normalize whitespace; collapse repeated blank lines.`
|
||||
)
|
||||
|
||||
// SummarizeOpts contains options for file summarization
|
||||
type SummarizeOpts struct {
|
||||
APIKey string
|
||||
Mode string
|
||||
}
|
||||
|
||||
// GoogleUsage represents token usage information from Google's Generative AI API
|
||||
type GoogleUsage struct {
|
||||
PromptTokenCount int32 `json:"prompt_token_count"`
|
||||
CachedContentTokenCount int32 `json:"cached_content_token_count"`
|
||||
CandidatesTokenCount int32 `json:"candidates_token_count"`
|
||||
TotalTokenCount int32 `json:"total_token_count"`
|
||||
}
|
||||
|
||||
func detectMimeType(data []byte) string {
|
||||
mimeType := http.DetectContentType(data)
|
||||
return strings.Split(mimeType, ";")[0]
|
||||
}
|
||||
|
||||
func getMaxFileSize(mimeType, mode string) (int, string) {
|
||||
if mimeType == "application/pdf" {
|
||||
return 5 * 1024 * 1024, "5MB"
|
||||
}
|
||||
if strings.HasPrefix(mimeType, "image/") {
|
||||
return 7 * 1024 * 1024, "7MB"
|
||||
}
|
||||
if mode == ModeHTMLContent || mode == ModeHTMLFull {
|
||||
return 500 * 1024, "500KB"
|
||||
}
|
||||
return 200 * 1024, "200KB"
|
||||
}
|
||||
|
||||
// SummarizeFile reads a file and generates a summary using Google's Generative AI.
|
||||
// It supports images, PDFs, and text files based on the limits defined in wshcmd-ai.go.
|
||||
// Returns the summary text, usage information, and any error encountered.
|
||||
func SummarizeFile(ctx context.Context, filename string, opts SummarizeOpts) (string, *GoogleUsage, error) {
|
||||
if opts.Mode == "" {
|
||||
return "", nil, fmt.Errorf("mode is required")
|
||||
}
|
||||
|
||||
// Read the file
|
||||
data, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
return "", nil, fmt.Errorf("reading file: %w", err)
|
||||
}
|
||||
|
||||
// Detect MIME type
|
||||
mimeType := detectMimeType(data)
|
||||
|
||||
isPDF := mimeType == "application/pdf"
|
||||
isImage := strings.HasPrefix(mimeType, "image/")
|
||||
|
||||
if !isPDF && !isImage {
|
||||
mimeType = "text/plain"
|
||||
if utilfn.ContainsBinaryData(data) {
|
||||
return "", nil, fmt.Errorf("file contains binary data and cannot be summarized")
|
||||
}
|
||||
}
|
||||
|
||||
// Validate file size
|
||||
maxSize, sizeStr := getMaxFileSize(mimeType, opts.Mode)
|
||||
if len(data) > maxSize {
|
||||
return "", nil, fmt.Errorf("file exceeds maximum size of %s for %s files", sizeStr, mimeType)
|
||||
}
|
||||
|
||||
// Create client
|
||||
client, err := genai.NewClient(ctx, option.WithAPIKey(opts.APIKey))
|
||||
if err != nil {
|
||||
return "", nil, fmt.Errorf("creating Google AI client: %w", err)
|
||||
}
|
||||
defer client.Close()
|
||||
|
||||
// Create model
|
||||
model := client.GenerativeModel(SummarizeModel)
|
||||
|
||||
// Select prompt based on mode
|
||||
var prompt string
|
||||
switch opts.Mode {
|
||||
case ModeQuickSummary:
|
||||
prompt = QuickSummaryPrompt
|
||||
case ModeUseful:
|
||||
prompt = UsefulSummaryPrompt
|
||||
case ModePublicCode:
|
||||
prompt = PublicCodeSummaryPrompt
|
||||
case ModeHTMLContent:
|
||||
prompt = HTMLContentPrompt
|
||||
case ModeHTMLFull:
|
||||
prompt = HTMLFullPrompt
|
||||
default:
|
||||
prompt = SummarizePrompt
|
||||
}
|
||||
|
||||
// Prepare the content parts
|
||||
var parts []genai.Part
|
||||
|
||||
// Add the prompt
|
||||
parts = append(parts, genai.Text(prompt))
|
||||
|
||||
// Add the file content based on type
|
||||
if isImage {
|
||||
// For images, use Blob
|
||||
parts = append(parts, genai.Blob{
|
||||
MIMEType: mimeType,
|
||||
Data: data,
|
||||
})
|
||||
} else if isPDF {
|
||||
// For PDFs, use Blob
|
||||
parts = append(parts, genai.Blob{
|
||||
MIMEType: mimeType,
|
||||
Data: data,
|
||||
})
|
||||
} else {
|
||||
// For text files, convert to string
|
||||
parts = append(parts, genai.Text(string(data)))
|
||||
}
|
||||
|
||||
// Generate content
|
||||
resp, err := model.GenerateContent(ctx, parts...)
|
||||
if err != nil {
|
||||
return "", nil, fmt.Errorf("generating content: %w", err)
|
||||
}
|
||||
|
||||
// Check if we got any candidates
|
||||
if len(resp.Candidates) == 0 {
|
||||
return "", nil, fmt.Errorf("no response candidates returned")
|
||||
}
|
||||
|
||||
// Extract the text from the first candidate
|
||||
candidate := resp.Candidates[0]
|
||||
if candidate.Content == nil || len(candidate.Content.Parts) == 0 {
|
||||
return "", nil, fmt.Errorf("no content in response")
|
||||
}
|
||||
|
||||
var summary strings.Builder
|
||||
for _, part := range candidate.Content.Parts {
|
||||
if textPart, ok := part.(genai.Text); ok {
|
||||
summary.WriteString(string(textPart))
|
||||
}
|
||||
}
|
||||
|
||||
// Convert usage metadata
|
||||
var usage *GoogleUsage
|
||||
if resp.UsageMetadata != nil {
|
||||
usage = &GoogleUsage{
|
||||
PromptTokenCount: resp.UsageMetadata.PromptTokenCount,
|
||||
CachedContentTokenCount: resp.UsageMetadata.CachedContentTokenCount,
|
||||
CandidatesTokenCount: resp.UsageMetadata.CandidatesTokenCount,
|
||||
TotalTokenCount: resp.UsageMetadata.TotalTokenCount,
|
||||
}
|
||||
}
|
||||
|
||||
return summary.String(), usage, nil
|
||||
}
|
||||
130
pkg/aiusechat/google/google-summarize_test.go
Normal file
130
pkg/aiusechat/google/google-summarize_test.go
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
// Copyright 2025, Command Line Inc.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package google
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestDetectMimeType(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data []byte
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "plain text",
|
||||
data: []byte("Hello, World!"),
|
||||
expected: "text/plain",
|
||||
},
|
||||
{
|
||||
name: "empty file",
|
||||
data: []byte{},
|
||||
expected: "text/plain",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := detectMimeType(tt.data)
|
||||
if !containsMimeType(result, tt.expected) {
|
||||
t.Errorf("detectMimeType() = %v, want to contain %v", result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func containsMimeType(got, want string) bool {
|
||||
// DetectContentType may return variations like "text/plain; charset=utf-8"
|
||||
return got == want || (want == "text/plain" && got == "text/plain; charset=utf-8")
|
||||
}
|
||||
|
||||
func TestSummarizeFile_FileNotFound(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
_, _, err := SummarizeFile(ctx, "/nonexistent/file.txt", SummarizeOpts{
|
||||
APIKey: "fake-api-key",
|
||||
Mode: ModeQuickSummary,
|
||||
})
|
||||
if err == nil {
|
||||
t.Error("SummarizeFile() expected error for nonexistent file, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSummarizeFile_BinaryFile(t *testing.T) {
|
||||
// Create a temporary binary file
|
||||
tmpDir := t.TempDir()
|
||||
binFile := filepath.Join(tmpDir, "test.bin")
|
||||
|
||||
// Create binary data (not text, image, or PDF)
|
||||
binaryData := []byte{0x00, 0x01, 0x02, 0x03, 0x7F, 0x80, 0xFF}
|
||||
if err := os.WriteFile(binFile, binaryData, 0644); err != nil {
|
||||
t.Fatalf("Failed to create test file: %v", err)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
_, _, err := SummarizeFile(ctx, binFile, SummarizeOpts{
|
||||
APIKey: "fake-api-key",
|
||||
Mode: ModeQuickSummary,
|
||||
})
|
||||
if err == nil {
|
||||
t.Error("SummarizeFile() expected error for binary file, got nil")
|
||||
}
|
||||
if err != nil && !containsString(err.Error(), "binary data") {
|
||||
t.Errorf("SummarizeFile() error = %v, want error containing 'binary data'", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSummarizeFile_FileTooLarge(t *testing.T) {
|
||||
// Create a temporary text file that exceeds the limit
|
||||
tmpDir := t.TempDir()
|
||||
textFile := filepath.Join(tmpDir, "large.txt")
|
||||
|
||||
// Create a file larger than 200KB (text file limit)
|
||||
largeData := make([]byte, 201*1024)
|
||||
for i := range largeData {
|
||||
largeData[i] = 'a'
|
||||
}
|
||||
if err := os.WriteFile(textFile, largeData, 0644); err != nil {
|
||||
t.Fatalf("Failed to create test file: %v", err)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
_, _, err := SummarizeFile(ctx, textFile, SummarizeOpts{
|
||||
APIKey: "fake-api-key",
|
||||
Mode: ModeQuickSummary,
|
||||
})
|
||||
if err == nil {
|
||||
t.Error("SummarizeFile() expected error for file too large, got nil")
|
||||
}
|
||||
if err != nil && !containsString(err.Error(), "exceeds maximum size") {
|
||||
t.Errorf("SummarizeFile() error = %v, want error containing 'exceeds maximum size'", err)
|
||||
}
|
||||
}
|
||||
|
||||
func containsString(s, substr string) bool {
|
||||
return len(s) >= len(substr) && (s == substr || len(substr) == 0 ||
|
||||
(len(s) > 0 && len(substr) > 0 && stringContains(s, substr)))
|
||||
}
|
||||
|
||||
func stringContains(s, substr string) bool {
|
||||
for i := 0; i <= len(s)-len(substr); i++ {
|
||||
if s[i:i+len(substr)] == substr {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Note: We don't test the actual API call without a real API key
|
||||
// Integration tests would require setting GOOGLE_API_KEY environment variable
|
||||
Loading…
Reference in a new issue