Update default MLX model to use quantized weights for improved performance and adjust setup instructions accordingly

2026-05-23 01:18:25 +00:00 · 2026-05-20 15:28:41 +02:00 · 2026-05-20 15:28:41 +02:00 · d666c68241
commit d666c68241
parent a5ed7b6af7
2 changed files with 3 additions and 2 deletions
--- a/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx
+++ b/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx
@ -890,7 +890,7 @@ export const OllamaSetupInstructions = ({ sayWeAutoDetect }: { sayWeAutoDetect?:
 export const MlxSetupInstructions = () => {
 	return <div className='prose-p:my-0 prose-ol:list-decimal prose-p:py-0 prose-ol:my-0 prose-ol:py-0 text-void-fg-3 text-sm list-decimal select-text mb-4'>
 		<div><ChatMarkdownRender string={`MLX (one model at a time)`} chatMessageLocation={undefined} /></div>
-		<div className='pl-6'><ChatMarkdownRender string={`1. Void can run \`pip install mlx-lm\` and start \`mlx_lm.server\` for you (toggle in Settings → Models). Default model: \`mlx-community/Qwen2.5-Coder-1.5B-Instruct\` on port 8080.`} chatMessageLocation={undefined} /></div>
+		<div className='pl-6'><ChatMarkdownRender string={`1. Void can run \`pip install mlx-lm\` and start \`mlx_lm.server\` for you (toggle in Settings → Models). Default model: \`mlx-community/Qwen2.5-Coder-1.5B-Instruct-4bit\` on port 8080.`} chatMessageLocation={undefined} /></div>
 		<div className='pl-6'><ChatMarkdownRender string={`2. Void shows only **one** autodetected entry: the model currently loaded by the server.`} chatMessageLocation={undefined} /></div>
 		<div className='pl-6'><ChatMarkdownRender string={`3. **Switch models**: stop the server, run \`mlx_lm.server --model <other-hf-repo>\`, then click **Refresh** next to MLX in Settings → Models.`} chatMessageLocation={undefined} /></div>
 		<div className='pl-6'><ChatMarkdownRender string={`4. **Second model in parallel**: run another server on a different port (e.g. \`mlx_lm.server --model … --port 8081\`), then at the bottom of the model list use **Add Model** → **OpenAI-Compatible** with \`http://127.0.0.1:8081/v1\`, or add the model id manually under MLX if the endpoint exposes multiple ids.`} chatMessageLocation={undefined} /></div>
--- a/src/vs/workbench/contrib/void/common/mlxTypes.ts
+++ b/src/vs/workbench/contrib/void/common/mlxTypes.ts
@ -8,7 +8,8 @@ import { createDecorator } from '../../../../platform/instantiation/common/insta
 export const MLX_DEFAULT_ENDPOINT = 'http://127.0.0.1:8080';
 export const MLX_DEFAULT_PORT = 8080;
 /** Small default model; first start may download weights from Hugging Face. */
-export const MLX_DEFAULT_MODEL = 'mlx-community/Qwen2.5-Coder-1.5B-Instruct';
+/** Quantized MLX weights on Hugging Face; the unsuffixed repo id does not exist. */
+export const MLX_DEFAULT_MODEL = 'mlx-community/Qwen2.5-Coder-1.5B-Instruct-4bit';

 export type MlxEnsureAction = 'already-running' | 'started' | 'installed-and-started';