language detection better

This commit is contained in:
Mathew Pareles 2025-01-11 22:37:31 -08:00
parent ee9f84a475
commit 10f8fcc17d
7 changed files with 216 additions and 89 deletions

View file

@ -0,0 +1,170 @@
// eg "bash" -> "shell"
export const nameToVscodeLanguage: { [key: string]: string } = {
// Web Technologies
'html': 'html',
'css': 'css',
'scss': 'scss',
'sass': 'scss',
'less': 'less',
'javascript': 'typescript',
'js': 'typescript', // use more general renderer
'jsx': 'typescript',
'typescript': 'typescript',
'ts': 'typescript',
'tsx': 'typescript',
'json': 'json',
'jsonc': 'json',
// Programming Languages
'python': 'python',
'py': 'python',
'java': 'java',
'cpp': 'cpp',
'c++': 'cpp',
'c': 'c',
'csharp': 'csharp',
'cs': 'csharp',
'c#': 'csharp',
'go': 'go',
'golang': 'go',
'rust': 'rust',
'rs': 'rust',
'ruby': 'ruby',
'rb': 'ruby',
'php': 'php',
'shell': 'shell',
'bash': 'shell',
'sh': 'shell',
'zsh': 'shell',
// Markup and Config
'markdown': 'markdown',
'md': 'markdown',
'xml': 'xml',
'svg': 'xml',
'yaml': 'yaml',
'yml': 'yaml',
'ini': 'ini',
'toml': 'ini',
// Database and Query Languages
'sql': 'sql',
'mysql': 'sql',
'postgresql': 'sql',
'graphql': 'graphql',
'gql': 'graphql',
// Others
'dockerfile': 'dockerfile',
'docker': 'dockerfile',
'makefile': 'makefile',
'plaintext': 'plaintext',
'text': 'plaintext'
};
// eg ".ts" -> "typescript"
const fileExtensionToVscodeLanguage: { [key: string]: string } = {
// Web
'html': 'html',
'htm': 'html',
'css': 'css',
'scss': 'scss',
'less': 'less',
'js': 'javascript',
'jsx': 'javascript',
'ts': 'typescript',
'tsx': 'typescript',
'json': 'json',
'jsonc': 'json',
// Programming Languages
'py': 'python',
'java': 'java',
'cpp': 'cpp',
'cc': 'cpp',
'c': 'c',
'h': 'cpp',
'hpp': 'cpp',
'cs': 'csharp',
'go': 'go',
'rs': 'rust',
'rb': 'ruby',
'php': 'php',
'sh': 'shell',
'bash': 'shell',
'zsh': 'shell',
// Markup/Config
'md': 'markdown',
'markdown': 'markdown',
'xml': 'xml',
'svg': 'xml',
'yaml': 'yaml',
'yml': 'yaml',
'ini': 'ini',
'toml': 'ini',
// Other
'sql': 'sql',
'graphql': 'graphql',
'gql': 'graphql',
'dockerfile': 'dockerfile',
'docker': 'dockerfile',
'mk': 'makefile',
// Config Files and Dot Files
'npmrc': 'ini',
'env': 'ini',
'gitignore': 'ignore',
'dockerignore': 'ignore',
'eslintrc': 'json',
'babelrc': 'json',
'prettierrc': 'json',
'stylelintrc': 'json',
'editorconfig': 'ini',
'htaccess': 'apacheconf',
'conf': 'ini',
'config': 'ini',
// Package Files
'package': 'json',
'package-lock': 'json',
'gemfile': 'ruby',
'podfile': 'ruby',
'rakefile': 'ruby',
// Build Systems
'cmake': 'cmake',
'makefile': 'makefile',
'gradle': 'groovy',
// Shell Scripts
'bashrc': 'shell',
'zshrc': 'shell',
'fish': 'shell',
// Version Control
'gitconfig': 'ini',
'hgrc': 'ini',
'svnconfig': 'ini',
// Web Server
'nginx': 'nginx',
// Misc Config
'properties': 'properties',
'cfg': 'ini',
'reg': 'ini'
};
export function filenameToVscodeLanguage(filename: string): string | undefined {
const ext = filename.toLowerCase().split('.').pop();
if (!ext) return undefined;
return fileExtensionToVscodeLanguage[ext];
}

View file

@ -1078,7 +1078,7 @@ class InlineDiffsService extends Disposable implements IInlineDiffsService {
let messages: LLMMessage[]
if (featureName === 'Ctrl+L') {
const userContent = ctrlLStream_prompt({ originalCode, userMessage })
const userContent = ctrlLStream_prompt({ originalCode, userMessage, uri })
messages = [
// TODO include more context too
{ role: 'system', content: ctrlLStream_systemMessage, },
@ -1087,7 +1087,7 @@ class InlineDiffsService extends Disposable implements IInlineDiffsService {
}
else if (featureName === 'Ctrl+K') {
const { prefix, suffix } = ctrlKStream_prefixAndSuffix({ fullFileStr: currentFileStr, startLine, endLine })
const userContent = ctrlKStream_prompt({ selection: originalCode, userMessage, prefix, suffix, modelWasTrainedOnFIM, fimTags: modelFimTags })
const userContent = ctrlKStream_prompt({ selection: originalCode, userMessage, prefix, suffix, modelWasTrainedOnFIM, fimTags: modelFimTags, uri })
console.log('PREFIX:\n', prefix)
console.log('SUFFIX:\n', suffix)
console.log('USER CONTENT:\n', userContent)

View file

@ -4,6 +4,8 @@
*--------------------------------------------------------------------------------------*/
import { URI } from '../../../../../base/common/uri.js';
import { filenameToVscodeLanguage } from '../helpers/detectLanguage.js';
import { CodeSelection } from '../threadHistoryService.js';
export const chat_systemMessage = `\
@ -22,25 +24,25 @@ Instructions:
FILES
selected file \`math.ts\`:
\`\`\`
\`\`\` typescript
const addNumbers = (a, b) => a + b
const subtractNumbers = (a, b) => a - b
const divideNumbers = (a, b) => a / b
\`\`\`
SELECTION
\`\`\`
\`\`\` typescript
const subtractNumbers = (a, b) => a - b
\`\`\`
INSTRUCTIONS
\`\`\`
\`\`\` typescript
add a function that multiplies numbers below this
\`\`\`
EXPECTED OUTPUT
We can add the following code to the file:
\`\`\`
\`\`\` typescript
// existing code...
const subtractNumbers = (a, b) => a - b;
const multiplyNumbers = (a, b) => a * b;
@ -51,7 +53,7 @@ const multiplyNumbers = (a, b) => a * b;
FILES
selected file \`fib.ts\`:
\`\`\`
\`\`\` typescript
const dfs = (root) => {
if (!root) return;
@ -66,18 +68,18 @@ const fib = (n) => {
\`\`\`
SELECTION
\`\`\`
\`\`\` typescript
return fib(n - 1) + fib(n - 2)
\`\`\`
INSTRUCTIONS
\`\`\`
\`\`\` typescript
memoize results
\`\`\`
EXPECTED OUTPUT
To implement memoization in your Fibonacci function, you can use a JavaScript object to store previously computed results. This will help avoid redundant calculations and improve performance. Here's how you can modify your function:
\`\`\`
\`\`\` typescript
// existing code...
const fib = (n, memo = {}) => {
if (n < 1) return 1;
@ -100,7 +102,7 @@ const stringifySelections = (selections: CodeSelection[]) => {
return selections.map(({ fileURI, content, selectionStr }) =>
`\
File: ${fileURI.fsPath}
\`\`\`
\`\`\` ${filenameToVscodeLanguage(fileURI.fsPath) ?? ''}
${content // this was the enite file which is foolish
}
\`\`\`${selectionStr === null ? '' : `
@ -136,7 +138,7 @@ Directions:
ORIGINAL_FILE
\`Sidebar.tsx\`:
\`\`\`
\`\`\` typescript
import React from 'react';
import styles from './Sidebar.module.css';
@ -172,7 +174,7 @@ export default Sidebar;
\`\`\`
DIFF
\`\`\`
\`\`\` typescript
@@ ... @@
-<div className={styles.sidebar}>
-<ul>
@ -211,7 +213,7 @@ DIFF
\`\`\`
NEW_FILE
\`\`\`
\`\`\` typescript
import React from 'react';
import styles from './Sidebar.module.css';
@ -226,7 +228,7 @@ const Sidebar: React.FC<SidebarProps> = ({ items, onItemSelect, onExtraButtonCli
\`\`\`
COMPLETION
\`\`\`
\`\`\` typescript
<div className={styles.sidebar}>
<ul>
{items.map((item, index) => (
@ -253,10 +255,13 @@ export default Sidebar;\`\`\`
export const ctrlLStream_prompt = ({ originalCode, userMessage }: { originalCode: string, userMessage: string }) => {
export const ctrlLStream_prompt = ({ originalCode, userMessage, uri }: { originalCode: string, userMessage: string, uri: URI }) => {
const language = filenameToVscodeLanguage(uri.fsPath) ?? ''
return `\
ORIGINAL_CODE
\`\`\`
\`\`\` ${language}
${originalCode}
\`\`\`
@ -336,9 +341,11 @@ export const defaultFimTags: FimTagsType = {
midTag: 'SELECTION',
}
export const ctrlKStream_prompt = ({ selection, prefix, suffix, userMessage, modelWasTrainedOnFIM, fimTags }: { selection: string, prefix: string, suffix: string, userMessage: string, modelWasTrainedOnFIM: boolean, fimTags: FimTagsType }) => {
export const ctrlKStream_prompt = ({ selection, prefix, suffix, userMessage, modelWasTrainedOnFIM, fimTags, uri }: { selection: string, prefix: string, suffix: string, userMessage: string, modelWasTrainedOnFIM: boolean, fimTags: FimTagsType, uri: URI }) => {
const { preTag, sufTag, midTag } = fimTags
const language = filenameToVscodeLanguage(uri.fsPath) ?? ''
if (modelWasTrainedOnFIM) {
// const preTag = 'PRE'
// const sufTag = 'SUF'
@ -360,14 +367,14 @@ ${prefix}</${preTag}>
// const midTag = 'SELECTION'
return `\
The user is selecting this code as their SELECTION:
\`\`\`
\`\`\` ${language}
<${midTag}>${selection}</${midTag}>
\`\`\`
The user wants to apply the following INSTRUCTIONS to the SELECTION:
${userMessage}
Please edit the SELECTION following the user's INSTRUCTIONS, and return only a single block.
Please edit the SELECTION following the user's INSTRUCTIONS, and return the edited selection.
Note that the SELECTION has code that comes before it. This code is indicated with <${preTag}>...before<${preTag}/>.
Note also that the SELECTION has code that comes after it. This code is indicated with <${sufTag}>...after<${sufTag}/>.
@ -378,11 +385,11 @@ Instructions:
3. Make sure all brackets in the new selection are balanced the same as in the original selection.
4. Be careful not to duplicate or remove variables, comments, or other syntax by mistake.
Given code:
Given the code:
<${preTag}>${prefix}</${preTag}>
<${sufTag}>${suffix}</${sufTag}>
Return one block of code of the form \`\`\`<${midTag}>...new_selection<${midTag}/>\`\`\`:`
Return only the completion block of code (of the form \`\`\` ${language}\n <${midTag}>...new_selection<${midTag}/>\`\`\`):`
}
};

View file

@ -7,62 +7,6 @@ import { ReactNode } from "react"
import { VoidCodeEditor, VoidCodeEditorProps } from '../util/inputs.js';
const extensionMap: { [key: string]: string } = {
// Web
'html': 'html',
'htm': 'html',
'css': 'css',
'scss': 'scss',
'less': 'less',
'js': 'javascript',
'jsx': 'javascript',
'ts': 'typescript',
'tsx': 'typescript',
'json': 'json',
'jsonc': 'json',
// Programming Languages
'py': 'python',
'java': 'java',
'cpp': 'cpp',
'cc': 'cpp',
'h': 'cpp',
'hpp': 'cpp',
'cs': 'csharp',
'go': 'go',
'rs': 'rust',
'rb': 'ruby',
'php': 'php',
'sh': 'shell',
'bash': 'shell',
'zsh': 'shell',
// Markup/Config
'md': 'markdown',
'markdown': 'markdown',
'xml': 'xml',
'svg': 'xml',
'yaml': 'yaml',
'yml': 'yaml',
'ini': 'ini',
'toml': 'ini',
// Other
'sql': 'sql',
'graphql': 'graphql',
'gql': 'graphql',
'dockerfile': 'dockerfile',
'docker': 'dockerfile'
};
export function getLanguageFromFileName(fileName: string): string {
const ext = fileName.toLowerCase().split('.').pop();
if (!ext) return 'plaintext';
return extensionMap[ext] || 'plaintext';
}
export const BlockCode = ({ buttonsOnHover, ...codeEditorProps }: { buttonsOnHover?: React.ReactNode } & VoidCodeEditorProps) => {
const isSingleLine = !codeEditorProps.initValue.includes('\n')

View file

@ -7,6 +7,7 @@ import React, { JSX, useCallback, useEffect, useState } from 'react'
import { marked, MarkedToken, Token } from 'marked'
import { BlockCode } from './BlockCode.js'
import { useAccessor } from '../util/services.js'
import { nameToVscodeLanguage } from '../../../helpers/detectLanguage.js'
enum CopyButtonState {
@ -20,7 +21,6 @@ const COPY_FEEDBACK_TIMEOUT = 1000 // amount of time to say 'Copied!'
const CodeButtonsOnHover = ({ text }: { text: string }) => {
const accessor = useAccessor()
const [copyButtonState, setCopyButtonState] = useState(CopyButtonState.Copy)
const inlineDiffService = accessor.get('IInlineDiffsService')
const clipboardService = accessor.get('IClipboardService')
@ -71,7 +71,7 @@ const CodeButtonsOnHover = ({ text }: { text: string }) => {
}
export const CodeSpan = ({ children, className }: { children: React.ReactNode, className?: string }) => {
return <code className={`text-vscode-text-preformat-fg bg-vscode-text-preformat-bg px-1 rounded-sm font-mono ${className}`}>
return <code className={`text-vscode-text-preformat-fg bg-vscode-text-preformat-bg px-1 rounded-sm font-mono break-all ${className}`}>
{children}
</code>
}
@ -88,7 +88,7 @@ const RenderToken = ({ token, nested = false }: { token: Token | string, nested?
if (t.type === "code") {
return <BlockCode
initValue={t.text}
// language={t.lang} // instead use vscode to detect language
language={t.lang && nameToVscodeLanguage[t.lang]} // use vscode to detect language
buttonsOnHover={<CodeButtonsOnHover text={t.text} />}
/>
}

View file

@ -9,7 +9,7 @@ import React, { ButtonHTMLAttributes, FormEvent, FormHTMLAttributes, Fragment, u
import { useAccessor, useSidebarState, useThreadsState } from '../util/services.js';
import { ChatMessage, CodeSelection, CodeStagingSelection, IThreadHistoryService } from '../../../threadHistoryService.js';
import { BlockCode, getLanguageFromFileName } from '../markdown/BlockCode.js';
import { BlockCode } from '../markdown/BlockCode.js';
import { ChatMarkdownRender } from '../markdown/ChatMarkdownRender.js';
import { URI } from '../../../../../../../base/common/uri.js';
import { EndOfLinePreference } from '../../../../../../../editor/common/model.js';
@ -27,6 +27,7 @@ import { SidebarThreadSelector } from './SidebarThreadSelector.js';
import { useScrollbarStyles } from '../util/useScrollbarStyles.js';
import { VOID_CTRL_L_ACTION_ID } from '../../../actionIDs.js';
import { ArrowBigLeftDash, CopyX, Delete, FileX2, SquareX, X } from 'lucide-react';
import { filenameToVscodeLanguage } from '../../../helpers/detectLanguage.js';
const IconX = ({ size, className = '', ...props }: { size: number, className?: string } & React.SVGProps<SVGSVGElement>) => {
@ -376,7 +377,7 @@ export const SelectedFiles = (
>
<BlockCode
initValue={selection.selectionStr!}
language={getLanguageFromFileName(selection.fileURI.path)}
language={filenameToVscodeLanguage(selection.fileURI.path)}
maxHeight={100}
showScrollbars={true}
/>
@ -417,14 +418,14 @@ const ChatBubble = ({ chatMessage, isLoading }: {
return <div
// align chatbubble accoridng to role
className={`
${role === 'user' ? 'px-4 self-end w-fit max-w-full' : ''}
${role === 'assistant' ? 'self-start w-full max-w-full' : ''}
${role === 'user' ? `px-2 self-end w-fit max-w-full` : ''}
${role === 'assistant' ? `px-2 self-start w-full max-w-full` : ''}
`}
>
<div
// style chatbubble according to role
className={`
p-4 text-left space-y-2 rounded-lg
p-2 text-left space-y-2 rounded-lg
overflow-x-auto max-w-full
${role === 'user' ? 'bg-vscode-input-bg text-vscode-input-fg' : ''}
`}

View file

@ -489,8 +489,8 @@ const normalizeIndentation = (code: string): string => {
}
export type VoidCodeEditorProps = { initValue: string, language?: string, maxHeight?: number, showScrollbars?: boolean }
export const VoidCodeEditor = ({ initValue, language, maxHeight, showScrollbars }: VoidCodeEditorProps) => {
export type VoidCodeEditorProps = { initValue: string, language?: string, maxHeight?: number, showScrollbars?: boolean, placeholderLanguage?: string }
export const VoidCodeEditor = ({ initValue, language, maxHeight, showScrollbars, placeholderLanguage }: VoidCodeEditorProps) => {
// default settings
const MAX_HEIGHT = maxHeight ?? Infinity;
@ -573,7 +573,12 @@ export const VoidCodeEditor = ({ initValue, language, maxHeight, showScrollbars
onDidChange: () => ({
dispose: () => { }
})
} : null
} : {
languageId: placeholderLanguage ?? '',
onDidChange: () => ({
dispose: () => { }
})
}
);
editor.setModel(model);