thinking slider backend support, QwQ

This commit is contained in:
Andrew Pareles 2025-03-07 00:28:54 -08:00
parent bc1a9d692e
commit ae15fd1f38
12 changed files with 228 additions and 170 deletions

View file

@ -401,7 +401,7 @@ class ChatThreadService extends Disposable implements IChatThreadService {
messagesType: 'chatMessages',
useProviderFor: 'Ctrl+L',
logging: { loggingName: `Agent` },
messages: messages,
messages,
tools: tools,

View file

@ -1187,14 +1187,19 @@ class EditCodeService extends Disposable implements IEditCodeService {
// throws if there's an error
public startApplying(opts: StartApplyingOpts) {
public startApplying(opts: StartApplyingOpts): [URI, Promise<void>] | null {
if (opts.type === 'rewrite') {
const addedDiffArea = this._initializeWriteoverStream(opts)
return addedDiffArea?._URI ?? null
const added = this._initializeWriteoverStream(opts)
if (!added) return null
const [diffZone, promise] = added
return [diffZone._URI, promise]
}
else if (opts.type === 'searchReplace') {
const addedDiffArea = this._initializeSearchAndReplaceStream(opts)
return addedDiffArea?._URI ?? null
const added = this._initializeSearchAndReplaceStream(opts)
if (!added) return null
if (!added) return null
const [diffZone, promise] = added
return [diffZone._URI, promise]
}
return null
}
@ -1221,9 +1226,9 @@ class EditCodeService extends Disposable implements IEditCodeService {
private _initializeWriteoverStream(opts: StartApplyingOpts): DiffZone | undefined {
private _initializeWriteoverStream(opts: StartApplyingOpts): [DiffZone, Promise<void>] | undefined {
const { from, onFinalMessage: onFinalMessage_, onError: onError_, } = opts
const { from, } = opts
let startLine: number
let endLine: number
@ -1267,9 +1272,15 @@ class EditCodeService extends Disposable implements IEditCodeService {
let streamRequestIdRef: { current: string | null } = { current: null }
// promise that resolves when the apply is done
let resApplyPromise: () => void
let rejApplyPromise: (e: any) => void
const applyPromise = new Promise<void>((res_, rej_) => { resApplyPromise = res_; rejApplyPromise = rej_ })
// add to history
const { onFinishEdit } = this._addToHistory(uri, {
onUndo: () => { onError_?.({ message: 'Edit was interrupted by pressing undo.', fullError: null }) }
onUndo: () => { if (diffZone._streamState.isStreaming) rejApplyPromise(new Error('Edit was interrupted by pressing undo.')) }
})
// __TODO__ let users customize modelFimTags
@ -1366,56 +1377,64 @@ class EditCodeService extends Disposable implements IEditCodeService {
let fullTextSoFar = '' // so far (INCLUDING ignored suffix)
let prevIgnoredSuffix = ''
streamRequestIdRef.current = this._llmMessageService.sendLLMMessage({
messagesType: 'chatMessages',
useProviderFor: opts.from === 'ClickApply' ? 'Apply' : 'Ctrl+K',
logging: { loggingName: `startApplying - ${from}` },
messages,
onText: (params) => {
const { fullText: fullText_ } = params
const newText_ = fullText_.substring(fullTextSoFar.length, Infinity)
const writeover = async () => {
const newText = prevIgnoredSuffix + newText_ // add the previously ignored suffix because it's no longer the suffix!
fullTextSoFar += newText // full text, including ```, etc
let resMessageDonePromise: () => void = () => { }
const messageDonePromise = new Promise<void>((res_) => { resMessageDonePromise = res_ })
const [croppedText, deltaCroppedText, croppedSuffix] = extractText(fullTextSoFar, newText.length)
const { endLineInLlmTextSoFar } = this._writeStreamedDiffZoneLLMText(uri, originalCode, croppedText, deltaCroppedText, latestStreamInfoMutable)
diffZone._streamState.line = (diffZone.startLine - 1) + endLineInLlmTextSoFar // change coordinate systems from originalCode to full file
streamRequestIdRef.current = this._llmMessageService.sendLLMMessage({
messagesType: 'chatMessages',
useProviderFor: opts.from === 'ClickApply' ? 'Apply' : 'Ctrl+K',
logging: { loggingName: `Edit (Writeover) - ${from}` },
messages,
onText: (params) => {
const { fullText: fullText_ } = params
const newText_ = fullText_.substring(fullTextSoFar.length, Infinity)
this._refreshStylesAndDiffsInURI(uri)
const newText = prevIgnoredSuffix + newText_ // add the previously ignored suffix because it's no longer the suffix!
fullTextSoFar += newText // full text, including ```, etc
prevIgnoredSuffix = croppedSuffix
},
onFinalMessage: (params) => {
const { fullText } = params
// console.log('DONE! FULL TEXT\n', extractText(fullText), diffZone.startLine, diffZone.endLine)
// at the end, re-write whole thing to make sure no sync errors
const [croppedText, _1, _2] = extractText(fullText, 0)
this._writeText(uri, croppedText,
{ startLineNumber: diffZone.startLine, startColumn: 1, endLineNumber: diffZone.endLine, endColumn: Number.MAX_SAFE_INTEGER }, // 1-indexed
{ shouldRealignDiffAreas: true }
)
onDone()
onFinalMessage_?.()
},
onError: (e) => {
this._notifyError(e)
onDone()
this._undoHistory(uri)
onError_?.(e)
},
const [croppedText, deltaCroppedText, croppedSuffix] = extractText(fullTextSoFar, newText.length)
const { endLineInLlmTextSoFar } = this._writeStreamedDiffZoneLLMText(uri, originalCode, croppedText, deltaCroppedText, latestStreamInfoMutable)
diffZone._streamState.line = (diffZone.startLine - 1) + endLineInLlmTextSoFar // change coordinate systems from originalCode to full file
})
this._refreshStylesAndDiffsInURI(uri)
return diffZone
prevIgnoredSuffix = croppedSuffix
},
onFinalMessage: (params) => {
const { fullText } = params
// console.log('DONE! FULL TEXT\n', extractText(fullText), diffZone.startLine, diffZone.endLine)
// at the end, re-write whole thing to make sure no sync errors
const [croppedText, _1, _2] = extractText(fullText, 0)
this._writeText(uri, croppedText,
{ startLineNumber: diffZone.startLine, startColumn: 1, endLineNumber: diffZone.endLine, endColumn: Number.MAX_SAFE_INTEGER }, // 1-indexed
{ shouldRealignDiffAreas: true }
)
onDone()
resMessageDonePromise()
},
onError: (e) => {
this._notifyError(e)
onDone()
this._undoHistory(uri)
resMessageDonePromise()
},
})
await messageDonePromise
}
writeover().then(() => resApplyPromise()).catch((e) => rejApplyPromise(e))
return [diffZone, applyPromise]
}
private _initializeSearchAndReplaceStream(opts: StartApplyingOpts & { from: 'ClickApply' }) {
const { applyStr, uri: givenURI, onFinalMessage: onFinalMessage_, onError: onError_, } = opts
private _initializeSearchAndReplaceStream(opts: StartApplyingOpts & { from: 'ClickApply' }): [DiffZone, Promise<void>] | undefined {
const { from, applyStr, uri: givenURI, } = opts
let uri: URI
if (givenURI === 'current') {
@ -1450,11 +1469,18 @@ class EditCodeService extends Disposable implements IEditCodeService {
// can use this as a proxy to set the diffArea's stream state requestId
let streamRequestIdRef: { current: string | null } = { current: null }
// promise that resolves when the apply is done
let resApplyPromise: () => void
let rejApplyPromise: (e: any) => void
const applyPromise = new Promise<void>((res_, rej_) => { resApplyPromise = res_; rejApplyPromise = rej_ })
// add to history
const { onFinishEdit } = this._addToHistory(uri, {
onUndo: () => { onError_?.({ message: 'Edit was interrupted by pressing undo.', fullError: null }) }
onUndo: () => { if (diffZone._streamState.isStreaming) rejApplyPromise(new Error('Edit was interrupted by pressing undo.')) }
})
// TODO replace these with whatever block we're on initially if already started
// TODO replace these with whatever block we're on initially if already started (caching apply)
type SearchReplaceDiffAreaMetadata = {
originalBounds: [number, number], // 1-indexed
@ -1542,13 +1568,13 @@ class EditCodeService extends Disposable implements IEditCodeService {
shouldSendAnotherMessage = false
nMessagesSent += 1
let res: () => void = () => { }
const awaitable = new Promise<void>((res_) => { res = res_ })
let resMessageDonePromise: () => void = () => { }
const messageDonePromise = new Promise<void>((res_) => { resMessageDonePromise = res_ })
streamRequestIdRef.current = this._llmMessageService.sendLLMMessage({
messagesType: 'chatMessages',
useProviderFor: 'Apply',
logging: { loggingName: `generateSearchAndReplace` },
logging: { loggingName: `Edit (Search/Replace) - ${from}` },
messages,
onText: (params) => {
const { fullText } = params
@ -1585,11 +1611,12 @@ class EditCodeService extends Disposable implements IEditCodeService {
// if error
if (typeof originalBounds === 'string') {
const content = errMsgOfInvalidStr(originalBounds, block.orig)
console.log('Content', content)
messages.push(
{ role: 'assistant', content: fullText }, // latest output
{ role: 'user', content: content } // user explanation of what's wrong
)
console.log('RETRYING!!!!!!!!!!', content, JSON.stringify(messages, null, 2))
if (streamRequestIdRef.current) this._llmMessageService.abort(streamRequestIdRef.current)
// REVERT
@ -1610,10 +1637,11 @@ class EditCodeService extends Disposable implements IEditCodeService {
shouldUpdateOrigStreamStyle = true
oldBlocks = []
addedTrackingZoneOfBlockNum.slice(0, Infinity) // clear the array
console.log('SHOULD BE EMPTY', addedTrackingZoneOfBlockNum)
shouldSendAnotherMessage = true
this._refreshStylesAndDiffsInURI(uri)
res()
resMessageDonePromise()
return
}
@ -1706,30 +1734,26 @@ class EditCodeService extends Disposable implements IEditCodeService {
}
onDone()
onFinalMessage_?.()
res()
resMessageDonePromise()
},
onError: (e) => {
this._notifyError(e)
onDone()
this._undoHistory(uri)
onError_?.(e)
res()
resMessageDonePromise()
},
})
await awaitable
await messageDonePromise
} // end while
} // end retryLoop
retryLoop()
retryLoop().then(() => resApplyPromise()).catch((e) => rejApplyPromise(e))
return diffZone
return [diffZone, applyPromise]
}

View file

@ -7,7 +7,6 @@ import { Event } from '../../../../base/common/event.js';
import { URI } from '../../../../base/common/uri.js';
import { ICodeEditor } from '../../../../editor/browser/editorBrowser.js';
import { createDecorator } from '../../../../platform/instantiation/common/instantiation.js';
import { OnError } from '../common/llmMessageTypes.js';
@ -21,9 +20,6 @@ export type StartApplyingOpts = ({
type: 'searchReplace' | 'rewrite';
applyStr: string;
uri: 'current' | URI;
}) & ({
onFinalMessage?: () => void;
onError?: OnError;
})
@ -41,7 +37,7 @@ export const IEditCodeService = createDecorator<IEditCodeService>('editCodeServi
export interface IEditCodeService {
readonly _serviceBrand: undefined;
startApplying(opts: StartApplyingOpts): URI | null;
startApplying(opts: StartApplyingOpts): [URI, Promise<void>] | null;
addCtrlKZone(opts: AddCtrlKOpts): number | undefined;
removeCtrlKZone(opts: { diffareaid: number }): void;

View file

@ -79,12 +79,12 @@ export const ApplyBlockHoverButtons = ({ codeStr, applyBoxId }: { codeStr: strin
const onSubmit = useCallback(() => {
if (isDisabled) return
if (streamState() === 'streaming') return
const newApplyingUri = editCodeService.startApplying({
const [newApplyingUri, _] = editCodeService.startApplying({
from: 'ClickApply',
type: 'searchReplace',
applyStr: codeStr,
uri: 'current',
})
}) ?? []
applyingURIOfApplyBoxIdRef.current[applyBoxId] = newApplyingUri ?? undefined
rerender(c => c + 1)
metricsService.capture('Apply Code', { length: codeStr.length }) // capture the length only

View file

@ -29,7 +29,7 @@ import { WarningBox } from '../void-settings-tsx/WarningBox.js';
import { ChatMessage, StagingSelectionItem, ToolMessage } from '../../../chatThreadService.js';
import { filenameToVscodeLanguage } from '../../../../common/helpers/detectLanguage.js';
import { ToolName } from '../../../toolsService.js';
import { getModelCapabilities } from '../../../../common/modelCapabilities.js';
import { getModelSelectionState, getModelCapabilities } from '../../../../common/modelCapabilities.js';
@ -158,7 +158,7 @@ const getChatBubbleId = (threadId: string, messageIdx: number) => `${threadId}-$
// if (!modelSelection) return null
// const { modelName, providerName } = modelSelection
// const { canToggleReasoning, reasoningBudgetOptions } = getModelCapabilities(providerName, modelName).supportsReasoningOutput || {}
// const { canToggleReasoning, reasoningBudgetSlider } = getModelCapabilities(providerName, modelName).supportsReasoningOutput || {}
// const defaultEnabledVal = canToggleReasoning ? true : false
// const isEnabled = voidSettingsState.optionsOfModelSelection[modelSelection.providerName]?.[modelSelection.modelName]?.reasoningEnabled ?? defaultEnabledVal
@ -176,8 +176,8 @@ const getChatBubbleId = (threadId: string, messageIdx: number) => `${threadId}-$
// }
// let slider: React.ReactNode = null
// if (isEnabled && reasoningBudgetOptions?.type === 'slider') {
// const { min, max, default: defaultVal } = reasoningBudgetOptions
// if (isEnabled && reasoningBudgetSlider?.type === 'slider') {
// const { min, max, default: defaultVal } = reasoningBudgetSlider
// const value = voidSettingsState.optionsOfModelSelection[modelSelection.providerName]?.[modelSelection.modelName]?.reasoningBudget ?? defaultVal
// slider = <div className='flex items-center gap-x-3'>
// <span className='text-void-fg-3 text-xs pointer-events-none inline-block w-10'>Budget</span>
@ -214,24 +214,24 @@ const ReasoningOptionDropdown = () => {
if (!modelSelection) return null
const { modelName, providerName } = modelSelection
const { canToggleReasoning, reasoningBudgetOptions } = getModelCapabilities(providerName, modelName).supportsReasoningOutput || {}
const { canToggleReasoning, reasoningBudgetSlider } = getModelCapabilities(providerName, modelName).supportsReasoning || {}
const defaultEnabledVal = canToggleReasoning ? true : false
const isEnabled = voidSettingsState.optionsOfModelSelection[modelSelection.providerName]?.[modelSelection.modelName]?.reasoningEnabled ?? defaultEnabledVal
const { isReasoningEnabled } = getModelSelectionState(providerName, modelName, voidSettingsState.optionsOfModelSelection)
if (canToggleReasoning && !reasoningBudgetOptions) { // if it's just a on/off toggle without a power slider (no models right now)
return <div className='flex items-center gap-x-2'>
<span className='text-void-fg-3 text-xs pointer-events-none inline-block w-10'>{isEnabled ? 'Thinking' : 'Thinking'}</span>
<VoidSwitch
size='xs'
value={isEnabled}
onChange={(newVal) => { }}
/>
</div>
if (canToggleReasoning && !reasoningBudgetSlider) { // if it's just a on/off toggle without a power slider (no models right now)
return null // unused right now
// return <div className='flex items-center gap-x-2'>
// <span className='text-void-fg-3 text-xs pointer-events-none inline-block w-10'>{isReasoningEnabled ? 'Thinking' : 'Thinking'}</span>
// <VoidSwitch
// size='xs'
// value={isReasoningEnabled}
// onChange={(newVal) => { } }
// />
// </div>
}
if (reasoningBudgetOptions?.type === 'slider') { // if it's a slider
const { min: min_, max, default: defaultVal } = reasoningBudgetOptions
if (reasoningBudgetSlider?.type === 'slider') { // if it's a slider
const { min: min_, max, default: defaultVal } = reasoningBudgetSlider
const value = voidSettingsState.optionsOfModelSelection[modelSelection.providerName]?.[modelSelection.modelName]?.reasoningBudget ?? defaultVal
@ -240,7 +240,7 @@ const ReasoningOptionDropdown = () => {
const min = canToggleReasoning ? min_ - stepSize : min_
return <div className='flex items-center gap-x-2'>
<span className='text-void-fg-3 text-xs pointer-events-none inline-block w-10'>Thinking</span>
<span className='text-void-fg-3 text-xs pointer-events-none inline-block w-10 pr-1'>Thinking</span>
<VoidSlider
width={50}
size='xs'
@ -249,11 +249,12 @@ const ReasoningOptionDropdown = () => {
step={stepSize}
value={value}
onChange={(newVal) => {
console.log('NEWVAL',newVal)
const disabled = newVal === min && canToggleReasoning
voidSettingsService.setOptionsOfModelSelection(modelSelection.providerName, modelSelection.modelName, { reasoningEnabled: !disabled, reasoningBudget: newVal })
}}
/>
<span className='text-void-fg-3 text-xs pointer-events-none'>{isEnabled ? `${value} tokens` : 'Thinking disabled'}</span>
<span className='text-void-fg-3 text-xs pointer-events-none'>{isReasoningEnabled ? `${value} tokens` : 'Thinking disabled'}</span>
</div>
}

View file

@ -459,6 +459,7 @@ export const FeaturesTab = () => {
<div className='w-full'>
<h4 className={`text-base`}>{displayInfoOfFeatureName('Apply')}</h4>
<div className='text-sm italic text-void-fg-3 my-1'>We recommend the smartest model you{`'`}ve got, like Claude 3.7 or Grok 3.</div>
<ModelDropdown featureName={'Apply'} />
</div>
</div>

View file

@ -449,16 +449,13 @@ export class ToolsService implements IToolsService {
},
edit: async ({ uri, changeDescription }) => {
const p = new Promise<void>((res, rej) => {
editCodeService.startApplying({
uri,
applyStr: changeDescription,
from: 'ClickApply',
type: 'searchReplace',
onFinalMessage: () => { res() },
onError: (e) => { throw new Error(e.message) },
})
})
const [_, p] = editCodeService.startApplying({
uri,
applyStr: changeDescription,
from: 'ClickApply',
type: 'searchReplace',
}) ?? []
await p
return {}
},

View file

@ -116,7 +116,7 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
this.llmMessageHooks.onError[requestId] = onError
const { aiInstructions } = this.voidSettingsService.state.globalSettings
const { settingsOfProvider } = this.voidSettingsService.state
const { settingsOfProvider, optionsOfModelSelection, } = this.voidSettingsService.state
// params will be stripped of all its functions over the IPC channel
this.channel.call('sendLLMMessage', {
@ -126,6 +126,7 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
providerName,
modelName,
settingsOfProvider,
optionsOfModelSelection,
} satisfies MainSendLLMMessageParams);
return requestId

View file

@ -5,7 +5,7 @@
import type { ChatMessage } from '../browser/chatThreadService.js'
import type { InternalToolInfo, ToolName } from '../browser/toolsService.js'
import { FeatureName, ProviderName, SettingsOfProvider } from './voidSettingsTypes.js'
import { FeatureName, OptionsOfModelSelection, ProviderName, SettingsOfProvider } from './voidSettingsTypes.js'
export const errorDetails = (fullError: Error | null): string | null => {
@ -113,6 +113,7 @@ export type SendLLMMessageParams = {
providerName: ProviderName;
modelName: string;
settingsOfProvider: SettingsOfProvider;
optionsOfModelSelection: OptionsOfModelSelection;
} & SendLLMType

View file

@ -3,13 +3,13 @@
* Licensed under the Apache License, Version 2.0. See LICENSE.txt for more information.
*--------------------------------------------------------------------------------------*/
import { ProviderName } from './voidSettingsTypes.js';
import { OptionsOfModelSelection, ProviderName } from './voidSettingsTypes.js';
export const defaultModelsOfProvider = {
openAI: [ // https://platform.openai.com/docs/models/gp
'o1',
'o3-mini',
'o1',
'o1-mini',
'gpt-4o',
'gpt-4o-mini',
@ -67,9 +67,9 @@ export const defaultModelsOfProvider = {
type ModelOptions = {
contextWindow: number; // input tokens
maxOutputTokens: number | null; // output tokens
cost: {
contextWindow: number; // input tokens // <-- UNUSED
maxOutputTokens: number | null; // output tokens // <-- UNUSED
cost: { // <-- UNUSED
input: number;
output: number;
cache_read?: number;
@ -79,15 +79,17 @@ type ModelOptions = {
supportsTools: false | 'anthropic-style' | 'openai-style';
supportsFIM: boolean;
supportsReasoningOutput: false | {
supportsReasoning: false | {
// reasoning options if supports reasoning
readonly canToggleReasoning: boolean; // whether or not the user can disable reasoning mode (false if the model only supports reasoning)
readonly canIOReasoning: boolean; // whether or not the model actually outputs reasoning
readonly reasoningMaxOutputTokens?: number; // overrides normal maxOutputTokens // <-- UNUSED (except anthropic)
readonly reasoningBudgetSlider?: { type: 'slider'; min: number; max: number; default: number };
// options related specifically to model output
// you are allowed to not include openSourceThinkTags if it's not open source (no such cases as of writing)
// if it's open source, put the think tags here so we parse them out in e.g. ollama
readonly openSourceThinkTags?: [string, string];
// reasoning options
readonly canToggleReasoning?: boolean; // whether or not the user can enable reasoning mode (or if the model only supports reasoning)
readonly maxOutputTokens?: number;
readonly reasoningBudgetOptions?: { type: 'slider'; min: number; max: number; default: number };
};
}
@ -116,7 +118,7 @@ const modelOptionsDefaults: ModelOptions = {
supportsSystemMessage: false,
supportsTools: false,
supportsFIM: false,
supportsReasoningOutput: false,
supportsReasoning: false,
}
@ -125,70 +127,70 @@ const openSourceModelOptions_assumingOAICompat = {
supportsFIM: false,
supportsSystemMessage: false,
supportsTools: false,
supportsReasoningOutput: { openSourceThinkTags: ['<think>', '</think>'] },
supportsReasoning: { canToggleReasoning: false, canIOReasoning: true, openSourceThinkTags: ['<think>', '</think>'] },
},
'deepseekCoderV2': {
supportsFIM: false,
supportsSystemMessage: false, // unstable
supportsTools: false,
supportsReasoningOutput: false,
supportsReasoning: false,
},
'codestral': {
supportsFIM: true,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
// llama
'llama3': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'llama3.1': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'llama3.2': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'llama3.3': {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
// qwen
'qwen2.5coder': {
supportsFIM: true,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'qwq': {
supportsFIM: false, // no FIM, yes reasoning
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: { openSourceThinkTags: ['<think>', '</think'] },
supportsReasoning: { canToggleReasoning: false, canIOReasoning: true, openSourceThinkTags: ['<think>', '</think>'] },
},
// FIM only
'starcoder2': {
supportsFIM: true,
supportsSystemMessage: false,
supportsTools: false,
supportsReasoningOutput: false,
supportsReasoning: false,
},
'codegemma:2b': {
supportsFIM: true,
supportsSystemMessage: false,
supportsTools: false,
supportsReasoningOutput: false,
supportsReasoning: false,
},
} as const satisfies { [s: string]: Partial<ModelOptions> }
@ -207,7 +209,7 @@ const extensiveModelFallback: ProviderSettings['modelOptionsFallback'] = (modelN
if (modelName.includes('gpt-4o')) return toFallback(openAIModelOptions['gpt-4o'])
if (modelName.includes('claude-3-5') || modelName.includes('claude-3.5')) return toFallback(anthropicModelOptions['claude-3-5-sonnet-20241022'])
if (modelName.includes('claude')) return toFallback(anthropicModelOptions['claude-3-7-sonnet-20250219'])
if (modelName.includes('grok')) return toFallback(xAIModelOptions['grok-2-latest'])
if (modelName.includes('grok')) return toFallback(xAIModelOptions['grok-2'])
if (modelName.includes('deepseek-r1') || modelName.includes('deepseek-reasoner')) return toFallback({ ...openSourceModelOptions_assumingOAICompat.deepseekR1, contextWindow: 32_000, maxOutputTokens: 4_096, })
if (modelName.includes('deepseek')) return toFallback({ ...openSourceModelOptions_assumingOAICompat.deepseekCoderV2, contextWindow: 32_000, maxOutputTokens: 4_096, })
if (modelName.includes('llama3')) return toFallback({ ...openSourceModelOptions_assumingOAICompat.llama3, contextWindow: 32_000, maxOutputTokens: 4_096, })
@ -231,10 +233,11 @@ const anthropicModelOptions = {
supportsFIM: false,
supportsSystemMessage: 'separated',
supportsTools: 'anthropic-style',
supportsReasoningOutput: {
supportsReasoning: {
canToggleReasoning: true,
maxOutputTokens: 64_000, // can bump it to 128_000 with beta mode output-128k-2025-02-19
reasoningBudgetOptions: { type: 'slider', min: 1024, max: 32_000, default: 1024 }, // they recommend batching if max > 32_000
canIOReasoning: true,
reasoningMaxOutputTokens: 64_000, // can bump it to 128_000 with beta mode output-128k-2025-02-19
reasoningBudgetSlider: { type: 'slider', min: 1024, max: 32_000, default: 1024 }, // they recommend batching if max > 32_000
},
},
'claude-3-5-sonnet-20241022': {
@ -244,7 +247,7 @@ const anthropicModelOptions = {
supportsFIM: false,
supportsSystemMessage: 'separated',
supportsTools: 'anthropic-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'claude-3-5-haiku-20241022': {
contextWindow: 200_000,
@ -253,7 +256,7 @@ const anthropicModelOptions = {
supportsFIM: false,
supportsSystemMessage: 'separated',
supportsTools: 'anthropic-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'claude-3-opus-20240229': {
contextWindow: 200_000,
@ -262,7 +265,7 @@ const anthropicModelOptions = {
supportsFIM: false,
supportsSystemMessage: 'separated',
supportsTools: 'anthropic-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'claude-3-sonnet-20240229': { // no point of using this, but including this for people who put it in
contextWindow: 200_000, cost: { input: 3.00, output: 15.00 },
@ -270,7 +273,7 @@ const anthropicModelOptions = {
supportsFIM: false,
supportsSystemMessage: 'separated',
supportsTools: 'anthropic-style',
supportsReasoningOutput: false,
supportsReasoning: false,
}
} as const satisfies { [s: string]: ModelOptions }
@ -298,7 +301,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
supportsFIM: false,
supportsTools: false,
supportsSystemMessage: 'developer-role',
supportsReasoningOutput: false,
supportsReasoning: { canIOReasoning: false, canToggleReasoning: false }, // it doesn't actually output reasoning, but our logic is fine with it
},
'o3-mini': {
contextWindow: 200_000,
@ -307,7 +310,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
supportsFIM: false,
supportsTools: false,
supportsSystemMessage: 'developer-role',
supportsReasoningOutput: false,
supportsReasoning: { canIOReasoning: false, canToggleReasoning: false },
},
'gpt-4o': {
contextWindow: 128_000,
@ -316,7 +319,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
supportsFIM: false,
supportsTools: 'openai-style',
supportsSystemMessage: 'system-role',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'o1-mini': {
contextWindow: 128_000,
@ -325,7 +328,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
supportsFIM: false,
supportsTools: false,
supportsSystemMessage: false, // does not support any system
supportsReasoningOutput: false,
supportsReasoning: { canIOReasoning: false, canToggleReasoning: false },
},
'gpt-4o-mini': {
contextWindow: 128_000,
@ -334,7 +337,7 @@ const openAIModelOptions = { // https://platform.openai.com/docs/pricing
supportsFIM: false,
supportsTools: 'openai-style',
supportsSystemMessage: 'system-role', // ??
supportsReasoningOutput: false,
supportsReasoning: false,
},
} as const satisfies { [s: string]: ModelOptions }
@ -353,14 +356,14 @@ const openAISettings: ProviderSettings = {
// ---------------- XAI ----------------
const xAIModelOptions = {
'grok-2-latest': {
'grok-2': {
contextWindow: 131_072,
maxOutputTokens: null, // 131_072,
cost: { input: 2.00, output: 10.00 },
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
} as const satisfies { [s: string]: ModelOptions }
@ -368,7 +371,7 @@ const xAISettings: ProviderSettings = {
modelOptions: xAIModelOptions,
modelOptionsFallback: (modelName) => {
let fallbackName: keyof typeof xAIModelOptions | null = null
if (modelName.includes('grok-2')) fallbackName = 'grok-2-latest'
if (modelName.includes('grok-2')) fallbackName = 'grok-2'
if (fallbackName) return { modelName: fallbackName, ...xAIModelOptions[fallbackName] }
return null
}
@ -384,7 +387,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style', // we are assuming OpenAI SDK when calling gemini
supportsReasoningOutput: false,
supportsReasoning: false,
},
'gemini-2.0-flash-lite-preview-02-05': {
contextWindow: 1_048_576,
@ -393,7 +396,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'gemini-1.5-flash': {
contextWindow: 1_048_576,
@ -402,7 +405,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'gemini-1.5-pro': {
contextWindow: 2_097_152,
@ -411,7 +414,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'gemini-1.5-flash-8b': {
contextWindow: 1_048_576,
@ -420,7 +423,7 @@ const geminiModelOptions = { // https://ai.google.dev/gemini-api/docs/pricing
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
} as const satisfies { [s: string]: ModelOptions }
@ -466,7 +469,7 @@ const groqModelOptions = { // https://console.groq.com/docs/models, https://groq
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'llama-3.1-8b-instant': {
contextWindow: 128_000,
@ -475,7 +478,7 @@ const groqModelOptions = { // https://console.groq.com/docs/models, https://groq
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'qwen-2.5-coder-32b': {
contextWindow: 128_000,
@ -484,7 +487,7 @@ const groqModelOptions = { // https://console.groq.com/docs/models, https://groq
supportsFIM: false, // unfortunately looks like no FIM support on groq
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'qwen-qwq-32b': { // https://huggingface.co/Qwen/QwQ-32B
contextWindow: 128_000,
@ -493,7 +496,7 @@ const groqModelOptions = { // https://console.groq.com/docs/models, https://groq
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: { openSourceThinkTags: ['<think>', '</think>'] }, // we're using reasoning_format:parsed so really don't need to know openSourceThinkTags
supportsReasoning: { canIOReasoning: true, canToggleReasoning: false, openSourceThinkTags: ['<think>', '</think>'] }, // we're using reasoning_format:parsed so really don't need to know openSourceThinkTags
},
} as const satisfies { [s: string]: ModelOptions }
const groqSettings: ProviderSettings = {
@ -540,7 +543,7 @@ const openRouterModelOptions_assumingOpenAICompat = {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: {},
supportsReasoning: { canIOReasoning: true, canToggleReasoning: false }, // TODO!!! false for now
},
'anthropic/claude-3.5-sonnet': {
contextWindow: 200_000,
@ -549,7 +552,7 @@ const openRouterModelOptions_assumingOpenAICompat = {
supportsFIM: false,
supportsSystemMessage: 'system-role',
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'mistralai/codestral-2501': {
...openSourceModelOptions_assumingOAICompat.codestral,
@ -557,7 +560,7 @@ const openRouterModelOptions_assumingOpenAICompat = {
maxOutputTokens: null,
cost: { input: 0.3, output: 0.9 },
supportsTools: 'openai-style',
supportsReasoningOutput: false,
supportsReasoning: false,
},
'qwen/qwen-2.5-coder-32b-instruct': {
...openSourceModelOptions_assumingOAICompat['qwen2.5coder'],
@ -614,6 +617,7 @@ const modelSettingsOfProvider: { [providerName in ProviderName]: ProviderSetting
// ---------------- exports ----------------
// returns the capabilities and the adjusted modelName if it was a fallback
export const getModelCapabilities = (providerName: ProviderName, modelName: string): ModelOptions & { modelName: string; isUnrecognizedModel: boolean } => {
const { modelOptions, modelOptionsFallback } = modelSettingsOfProvider[providerName]
if (modelName in modelOptions) return { modelName, ...modelOptions[modelName], isUnrecognizedModel: false }
@ -627,3 +631,13 @@ export const getProviderCapabilities = (providerName: ProviderName) => {
const { providerReasoningIOSettings } = modelSettingsOfProvider[providerName]
return { providerReasoningIOSettings }
}
// state from optionsOfModelSelection
export const getModelSelectionState = (providerName: ProviderName, modelName: string, optionsOfModelSelection: OptionsOfModelSelection): { isReasoningEnabled: boolean, reasoningBudget: number | undefined } => {
const { canToggleReasoning } = getModelCapabilities(providerName, modelName).supportsReasoning || {}
const defaultEnabledVal = canToggleReasoning ? true : false
const isReasoningEnabled = optionsOfModelSelection[providerName]?.[modelName]?.reasoningEnabled ?? defaultEnabledVal
const reasoningBudget = optionsOfModelSelection[providerName]?.[modelName]?.reasoningBudget
return { isReasoningEnabled, reasoningBudget }
}

View file

@ -11,9 +11,9 @@ import { Model as OpenAIModel } from 'openai/resources/models.js';
import { extractReasoningOnFinalMessage, extractReasoningOnTextWrapper } from '../../browser/helpers/extractCodeFromResult.js';
import { LLMChatMessage, LLMFIMMessage, ModelListParams, OllamaModelResponse, OnError, OnFinalMessage, OnText } from '../../common/llmMessageTypes.js';
import { InternalToolInfo, isAToolName, ToolName } from '../../browser/toolsService.js';
import { defaultProviderSettings, displayInfoOfProviderName, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
import { defaultProviderSettings, displayInfoOfProviderName, OptionsOfModelSelection, ProviderName, SettingsOfProvider } from '../../common/voidSettingsTypes.js';
import { prepareFIMMessage, prepareMessages } from './preprocessLLMMessages.js';
import { getModelCapabilities, getProviderCapabilities } from '../../common/modelCapabilities.js';
import { getModelSelectionState, getModelCapabilities, getProviderCapabilities } from '../../common/modelCapabilities.js';
type InternalCommonMessageParams = {
@ -23,6 +23,7 @@ type InternalCommonMessageParams = {
onError: OnError;
providerName: ProviderName;
settingsOfProvider: SettingsOfProvider;
optionsOfModelSelection: OptionsOfModelSelection;
modelName: string;
_setAborter: (aborter: () => void) => void;
}
@ -153,31 +154,35 @@ const _sendOpenAICompatibleFIM = ({ messages: messages_, onFinalMessage, onError
const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage, onError, settingsOfProvider, modelName: modelName_, _setAborter, providerName, aiInstructions, tools: tools_ }: SendChatParams_Internal) => {
const {
modelName,
supportsReasoningOutput,
supportsReasoning,
supportsSystemMessage,
supportsTools,
// maxOutputTokens, right now we are ignoring this
} = getModelCapabilities(providerName, modelName_)
const {
canIOReasoning,
openSourceThinkTags,
} = supportsReasoning || {}
const { providerReasoningIOSettings } = getProviderCapabilities(providerName)
const { messages } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, supportsAnthropicContent: false }) // can change supportsAnthropicContent if e.g. OpenRouter starts supporting anthropic extended thinking
const tools = (supportsTools && ((tools_?.length ?? 0) !== 0)) ? tools_?.map(tool => toOpenAICompatibleTool(tool)) : undefined
const includeInPayload = supportsReasoningOutput ? providerReasoningIOSettings?.input?.includeInPayload || {} : {}
const includeInPayload = canIOReasoning ? providerReasoningIOSettings?.input?.includeInPayload || {} : {}
const toolsObj = tools ? { tools: tools, tool_choice: 'auto', parallel_tool_calls: false, } as const : {}
const openai: OpenAI = newOpenAICompatibleSDK({ providerName, settingsOfProvider, includeInPayload })
const options: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelName, messages: messages, stream: true, ...toolsObj, }
const { nameOfFieldInDelta: nameOfReasoningFieldInDelta, needsManualParse: needsManualReasoningParse } = providerReasoningIOSettings?.output ?? {}
const manuallyParseReasoning = needsManualReasoningParse && supportsReasoningOutput && supportsReasoningOutput.openSourceThinkTags
const { needsManualParse: needsManualReasoningParse, nameOfFieldInDelta: nameOfReasoningFieldInDelta } = providerReasoningIOSettings?.output ?? {}
const manuallyParseReasoning = needsManualReasoningParse && canIOReasoning && openSourceThinkTags
if (manuallyParseReasoning) {
onText = extractReasoningOnTextWrapper(onText, supportsReasoningOutput.openSourceThinkTags)
onText = extractReasoningOnTextWrapper(onText, openSourceThinkTags)
}
let fullReasoningSoFar = ''
let fullTextSoFar = ''
const toolCallOfIndex: ToolCallOfIndex = {}
@ -216,7 +221,7 @@ const _sendOpenAICompatibleChat = ({ messages: messages_, onText, onFinalMessage
}
else {
if (manuallyParseReasoning) {
const { fullText, fullReasoning } = extractReasoningOnFinalMessage(fullTextSoFar, supportsReasoningOutput.openSourceThinkTags)
const { fullText, fullReasoning } = extractReasoningOnFinalMessage(fullTextSoFar, openSourceThinkTags)
onFinalMessage({ fullText, fullReasoning, toolCalls });
} else {
onFinalMessage({ fullText: fullTextSoFar, fullReasoning: fullReasoningSoFar, toolCalls });
@ -283,13 +288,18 @@ const toolCallsFrom_AnthropicContent = (content: Anthropic.Messages.ContentBlock
}).filter(t => !!t)
}
const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalMessage, onError, settingsOfProvider, modelName: modelName_, _setAborter, aiInstructions, tools: tools_ }: SendChatParams_Internal) => {
const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalMessage, onError, settingsOfProvider, optionsOfModelSelection, modelName: modelName_, _setAborter, aiInstructions, tools: tools_ }: SendChatParams_Internal) => {
const {
modelName,
supportsSystemMessage,
supportsTools,
maxOutputTokens,
supportsReasoning,
} = getModelCapabilities(providerName, modelName_)
const {
isReasoningEnabled,
reasoningBudget,
} = getModelSelectionState(providerName, modelName_, optionsOfModelSelection) // user's modelName_ here
const { messages, separateSystemMessageStr } = prepareMessages({ messages: messages_, aiInstructions, supportsSystemMessage, supportsTools, supportsAnthropicContent: true })
@ -297,14 +307,26 @@ const sendAnthropicChat = ({ messages: messages_, providerName, onText, onFinalM
const anthropic = new Anthropic({ apiKey: thisConfig.apiKey, dangerouslyAllowBrowser: true });
const tools = ((tools_?.length ?? 0) !== 0) ? tools_?.map(tool => toAnthropicTool(tool)) : undefined
const toolsObj: Partial<Anthropic.Messages.MessageStreamParams> = tools ? {
tools: tools,
tool_choice: { type: 'auto', disable_parallel_tool_use: true } // one tool at a time
} : {}
const enableThinking = supportsReasoning && isReasoningEnabled && reasoningBudget
const maxTokens = enableThinking ? supportsReasoning.reasoningMaxOutputTokens : maxOutputTokens
const thinkingObj: Partial<Anthropic.Messages.MessageStreamParams> = enableThinking ? {
thinking: { type: 'enabled', budget_tokens: reasoningBudget } // thinking enabled
} : {}
const stream = anthropic.messages.stream({
system: separateSystemMessageStr,
messages: messages,
model: modelName,
max_tokens: maxOutputTokens ?? 4_096, // anthropic requires this
tools: tools,
tool_choice: tools ? { type: 'auto', disable_parallel_tool_use: true } : undefined, // one tool use at a time
thinking: { budget_tokens: 2000, type: 'enabled' }, // TODO!!!!
max_tokens: maxTokens ?? 4_096, // anthropic requires this
...toolsObj,
...thinkingObj,
})
// when receive text

View file

@ -19,6 +19,7 @@ export const sendLLMMessage = ({
abortRef: abortRef_,
logging: { loggingName },
settingsOfProvider,
optionsOfModelSelection,
providerName,
modelName,
tools,
@ -104,12 +105,12 @@ export const sendLLMMessage = ({
}
const { sendFIM, sendChat } = implementation
if (messagesType === 'chatMessages') {
sendChat({ messages: messages_, onText, onFinalMessage, onError, settingsOfProvider, modelName, _setAborter, providerName, aiInstructions, tools })
sendChat({ messages: messages_, onText, onFinalMessage, onError, settingsOfProvider, optionsOfModelSelection, modelName, _setAborter, providerName, aiInstructions, tools })
return
}
if (messagesType === 'FIMMessage') {
if (sendFIM) {
sendFIM({ messages: messages_, onText, onFinalMessage, onError, settingsOfProvider, modelName, _setAborter, providerName, aiInstructions })
sendFIM({ messages: messages_, onText, onFinalMessage, onError, settingsOfProvider, optionsOfModelSelection, modelName, _setAborter, providerName, aiInstructions })
return
}
onError({ message: `Error: This provider does not support Autocomplete yet.`, fullError: null })