Update default MLX model to use quantized weights for improved performance and adjust setup instructions accordingly

This commit is contained in:
Jérôme Commaret 2026-05-20 15:28:41 +02:00
parent a5ed7b6af7
commit d666c68241
2 changed files with 3 additions and 2 deletions

View file

@ -890,7 +890,7 @@ export const OllamaSetupInstructions = ({ sayWeAutoDetect }: { sayWeAutoDetect?:
export const MlxSetupInstructions = () => {
return <div className='prose-p:my-0 prose-ol:list-decimal prose-p:py-0 prose-ol:my-0 prose-ol:py-0 text-void-fg-3 text-sm list-decimal select-text mb-4'>
<div><ChatMarkdownRender string={`MLX (one model at a time)`} chatMessageLocation={undefined} /></div>
<div className='pl-6'><ChatMarkdownRender string={`1. Void can run \`pip install mlx-lm\` and start \`mlx_lm.server\` for you (toggle in Settings → Models). Default model: \`mlx-community/Qwen2.5-Coder-1.5B-Instruct\` on port 8080.`} chatMessageLocation={undefined} /></div>
<div className='pl-6'><ChatMarkdownRender string={`1. Void can run \`pip install mlx-lm\` and start \`mlx_lm.server\` for you (toggle in Settings → Models). Default model: \`mlx-community/Qwen2.5-Coder-1.5B-Instruct-4bit\` on port 8080.`} chatMessageLocation={undefined} /></div>
<div className='pl-6'><ChatMarkdownRender string={`2. Void shows only **one** autodetected entry: the model currently loaded by the server.`} chatMessageLocation={undefined} /></div>
<div className='pl-6'><ChatMarkdownRender string={`3. **Switch models**: stop the server, run \`mlx_lm.server --model <other-hf-repo>\`, then click **Refresh** next to MLX in Settings → Models.`} chatMessageLocation={undefined} /></div>
<div className='pl-6'><ChatMarkdownRender string={`4. **Second model in parallel**: run another server on a different port (e.g. \`mlx_lm.server --model … --port 8081\`), then at the bottom of the model list use **Add Model** → **OpenAI-Compatible** with \`http://127.0.0.1:8081/v1\`, or add the model id manually under MLX if the endpoint exposes multiple ids.`} chatMessageLocation={undefined} /></div>

View file

@ -8,7 +8,8 @@ import { createDecorator } from '../../../../platform/instantiation/common/insta
export const MLX_DEFAULT_ENDPOINT = 'http://127.0.0.1:8080';
export const MLX_DEFAULT_PORT = 8080;
/** Small default model; first start may download weights from Hugging Face. */
export const MLX_DEFAULT_MODEL = 'mlx-community/Qwen2.5-Coder-1.5B-Instruct';
/** Quantized MLX weights on Hugging Face; the unsuffixed repo id does not exist. */
export const MLX_DEFAULT_MODEL = 'mlx-community/Qwen2.5-Coder-1.5B-Instruct-4bit';
export type MlxEnsureAction = 'already-running' | 'started' | 'installed-and-started';