From 1557a44934d99484854aa0774e1f7f40172cf0ab Mon Sep 17 00:00:00 2001 From: Mathew P Date: Sun, 13 Oct 2024 00:15:41 -0700 Subject: [PATCH] Better diff algorithm --- extensions/void/package-lock.json | 13 ++ extensions/void/package.json | 2 + extensions/void/src/DisplayChangesProvider.ts | 70 +++---- extensions/void/src/extension.ts | 2 +- extensions/void/src/findDiffs.ts | 176 ++++++++++++++++++ extensions/void/src/getDiffedLines.ts | 93 --------- extensions/void/src/shared_types.ts | 19 +- 7 files changed, 235 insertions(+), 140 deletions(-) create mode 100644 extensions/void/src/findDiffs.ts delete mode 100644 extensions/void/src/getDiffedLines.ts diff --git a/extensions/void/package-lock.json b/extensions/void/package-lock.json index 2f929277..0bc6d47d 100644 --- a/extensions/void/package-lock.json +++ b/extensions/void/package-lock.json @@ -9,11 +9,13 @@ "version": "0.0.1", "dependencies": { "@anthropic-ai/sdk": "^0.27.1", + "diff-match-patch": "^1.0.5", "openai": "^4.57.0" }, "devDependencies": { "@eslint/js": "^9.9.1", "@types/diff": "^5.2.2", + "@types/diff-match-patch": "^1.0.36", "@types/jest": "^29.5.12", "@types/mocha": "^10.0.8", "@types/node": "^22.5.1", @@ -1001,6 +1003,12 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/diff-match-patch": { + "version": "1.0.36", + "resolved": "https://registry.npmjs.org/@types/diff-match-patch/-/diff-match-patch-1.0.36.tgz", + "integrity": "sha512-xFdR6tkm0MWvBfO8xXCSsinYxHcqkQUlcHeSpMC2ukzOb6lwQAfDmW+Qt0AvlGd8HpsS28qKsB+oPeJn9I39jg==", + "dev": true + }, "node_modules/@types/estree": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.6.tgz", @@ -2546,6 +2554,11 @@ "node": ">=0.3.1" } }, + "node_modules/diff-match-patch": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/diff-match-patch/-/diff-match-patch-1.0.5.tgz", + "integrity": "sha512-IayShXAgj/QMXgB0IWmKx+rOPuGMhqm5w6jvFxmVenXKIzRqTAAsbBPT3kWQeGANj3jGgvcvv4yK6SxqYmikgw==" + }, "node_modules/diff-sequences": { "version": "29.6.3", "resolved": "https://registry.npmjs.org/diff-sequences/-/diff-sequences-29.6.3.tgz", diff --git a/extensions/void/package.json b/extensions/void/package.json index 809329dd..ee04b8fe 100644 --- a/extensions/void/package.json +++ b/extensions/void/package.json @@ -135,6 +135,7 @@ "devDependencies": { "@eslint/js": "^9.9.1", "@types/diff": "^5.2.2", + "@types/diff-match-patch": "^1.0.36", "@types/jest": "^29.5.12", "@types/mocha": "^10.0.8", "@types/node": "^22.5.1", @@ -165,6 +166,7 @@ }, "dependencies": { "@anthropic-ai/sdk": "^0.27.1", + "diff-match-patch": "^1.0.5", "openai": "^4.57.0" } } diff --git a/extensions/void/src/DisplayChangesProvider.ts b/extensions/void/src/DisplayChangesProvider.ts index 6099326e..7b83a359 100644 --- a/extensions/void/src/DisplayChangesProvider.ts +++ b/extensions/void/src/DisplayChangesProvider.ts @@ -1,6 +1,6 @@ import * as vscode from 'vscode'; -import { getDiffedLines, SuggestedDiff } from './getDiffedLines'; -import { Diff, DiffArea } from './shared_types'; +import { findDiffs } from './findDiffs'; +import { Diff, DiffArea, DiffBlock } from './shared_types'; @@ -142,7 +142,7 @@ export class DisplayChangesProvider implements vscode.CodeLensProvider { const currentCode = editor.document.getText(new vscode.Range(diffArea.startLine, 0, diffArea.endLine, Number.MAX_SAFE_INTEGER)).replace(/\r\n/g, '\n') // compute the diffs - const diffs = getDiffedLines(diffArea.originalCode, currentCode) + const diffs = findDiffs(diffArea.originalCode, currentCode) // print diffs console.log('!CODEBefore:', JSON.stringify(diffArea.originalCode)) @@ -152,15 +152,27 @@ export class DisplayChangesProvider implements vscode.CodeLensProvider { this.addDiffs(editor.document.uri, diffs) for (const diff of this._diffsOfDocument[docUriStr]) { - console.log('originalCodeDiff:', JSON.stringify(diff.originalCode)) - console.log('greenCodeDiff:', JSON.stringify(editor.document.getText(diff.greenRange).replace(/\r\n/g, '\n'))) + console.log('------------') + console.log('deletedCode:', JSON.stringify(diff.deletedCode)) + console.log('insertedCode:', JSON.stringify(diff.insertedCode)) + console.log('deletedRange:', diff.deletedRange.start.line, diff.deletedRange.end.line,) + console.log('insertedRange:', diff.insertedRange.start.line, diff.insertedRange.end.line,) } } - // update highlighting - editor.setDecorations(greenDecoration, this._diffsOfDocument[docUriStr].map(diff => diff.greenRange)) + // update green highlighting + editor.setDecorations( + greenDecoration, + (this._diffsOfDocument[docUriStr] + .filter(diff => diff.insertedRange !== undefined) + .map(diff => diff.insertedRange) + ) + ); + + // TODO update red highlighting + // this._diffsOfDocument[docUriStr].map(diff => diff.deletedCode) // update code lenses this._onDidChangeCodeLenses.fire() @@ -168,7 +180,7 @@ export class DisplayChangesProvider implements vscode.CodeLensProvider { } // used by us only - public addDiffs(docUri: vscode.Uri, diffs: SuggestedDiff[]) { + public addDiffs(docUri: vscode.Uri, diffs: DiffBlock[]) { const docUriStr = docUri.toString() @@ -176,43 +188,17 @@ export class DisplayChangesProvider implements vscode.CodeLensProvider { if (!this._diffsOfDocument[docUriStr]) this._diffsOfDocument[docUriStr] = [] - - // 1. convert suggested diffs (which are described using line numbers) into actual diffs (described using vscode.Range, vscode.Uri) - // must do this before adding codelenses or highlighting so that codelens and highlights will apply to the fresh code and not the old code - // apply changes in reverse order so additions don't push down the line numbers of the next edit - let workspaceEdit = new vscode.WorkspaceEdit(); + // add each diff and its codelens to the document for (let i = diffs.length - 1; i > -1; i -= 1) { let suggestedDiff = diffs[i] - let greenRange: vscode.Range - - // INSERTIONS (e.g. {originalStartLine: 0, originalEndLine: -1}) - if (suggestedDiff.originalStartLine > suggestedDiff.originalEndLine) { - const originalPosition = new vscode.Position(suggestedDiff.originalStartLine, 0) - workspaceEdit.insert(docUri, originalPosition, suggestedDiff.afterCode + '\n') // add back in the line we deleted when we made the startline->endline range go negative - greenRange = new vscode.Range(suggestedDiff.startLine, 0, suggestedDiff.endLine + 1, 0) - } - // DELETIONS - else if (suggestedDiff.startLine > suggestedDiff.endLine) { - const deleteRange = new vscode.Range(suggestedDiff.originalStartLine, 0, suggestedDiff.originalEndLine + 1, 0) - workspaceEdit.delete(docUri, deleteRange) - greenRange = new vscode.Range(suggestedDiff.startLine, 0, suggestedDiff.startLine, 0) - suggestedDiff.beforeCode += '\n' // add back in the line we deleted when we made the startline->endline range go negative - } - // REPLACEMENTS - else { - const originalRange = new vscode.Range(suggestedDiff.originalStartLine, 0, suggestedDiff.originalEndLine, Number.MAX_SAFE_INTEGER) - workspaceEdit.replace(docUri, originalRange, suggestedDiff.afterCode) - greenRange = new vscode.Range(suggestedDiff.startLine, 0, suggestedDiff.endLine, Number.MAX_SAFE_INTEGER) - } - this._diffsOfDocument[docUriStr].push({ + ...suggestedDiff, diffid: this._diffidPool, - greenRange: greenRange, - originalCode: suggestedDiff.beforeCode, + // originalCode: suggestedDiff.deletedText, lenses: [ - new vscode.CodeLens(greenRange, { title: 'Accept', command: 'void.acceptDiff', arguments: [{ diffid: this._diffidPool }] }), - new vscode.CodeLens(greenRange, { title: 'Reject', command: 'void.rejectDiff', arguments: [{ diffid: this._diffidPool }] }) + new vscode.CodeLens(suggestedDiff.insertedRange, { title: 'Accept', command: 'void.acceptDiff', arguments: [{ diffid: this._diffidPool }] }), + new vscode.CodeLens(suggestedDiff.insertedRange, { title: 'Reject', command: 'void.rejectDiff', arguments: [{ diffid: this._diffidPool }] }) ] }); this._diffidPool += 1 @@ -261,17 +247,17 @@ export class DisplayChangesProvider implements vscode.CodeLensProvider { return } - const { greenRange: range, lenses, originalCode } = this._diffsOfDocument[docUriStr][index] // do this before we splice and mess up index + const { insertedRange: range, lenses, deletedCode } = this._diffsOfDocument[docUriStr][index] // do this before we splice and mess up index // remove this diff from the diffsOfDocument[docStr] (can change this behavior in future if add something like history) this._diffsOfDocument[docUriStr].splice(index, 1) // clear the decoration in this diffs range - editor.setDecorations(greenDecoration, this._diffsOfDocument[docUriStr].map(diff => diff.greenRange)) + // editor.setDecorations(greenDecoration, this._diffsOfDocument[docUriStr].map(diff => diff.insertionRange)) // REVERT THE CHANGE (this is the only part that's different from acceptDiff) let workspaceEdit = new vscode.WorkspaceEdit(); - workspaceEdit.replace(docUri, range, originalCode); + // workspaceEdit.replace(docUri, range, deletedCode); this._weAreEditing = true await vscode.workspace.applyEdit(workspaceEdit) await vscode.workspace.save(docUri) diff --git a/extensions/void/src/extension.ts b/extensions/void/src/extension.ts index d2da0238..21cc3dfb 100644 --- a/extensions/void/src/extension.ts +++ b/extensions/void/src/extension.ts @@ -57,7 +57,7 @@ export function activate(context: vscode.ExtensionContext) { // vscode.commands.executeCommand('vscode.moveViewToPanel', CustomViewProvider.viewId); // move to aux bar // get the text the user is selecting - const selectionStr = editor.document.getText(editor.selection); + const selectionStr = editor.document.getText(editor.selection);5 // get the range of the selection const selectionRange = editor.selection; diff --git a/extensions/void/src/findDiffs.ts b/extensions/void/src/findDiffs.ts new file mode 100644 index 00000000..5e456f85 --- /dev/null +++ b/extensions/void/src/findDiffs.ts @@ -0,0 +1,176 @@ + +import * as vscode from 'vscode'; +// import { diffLines, Change } from 'diff'; +import { DiffBlock } from './shared_types'; + +import { diff_match_patch } from 'diff-match-patch'; + + +const diffLines = (text1: string, text2: string) => { + var dmp = new diff_match_patch(); + var a = dmp.diff_linesToChars_(text1, text2); + var lineText1 = a.chars1; + var lineText2 = a.chars2; + var lineArray = a.lineArray; + var diffs = dmp.diff_main(lineText1, lineText2, false); + dmp.diff_charsToLines_(diffs, lineArray); + // dmp.diff_cleanupSemantic(diffs); + return diffs; +} + + +// TODO use a better diff algorithm +export const findDiffs = (oldText: string, newText: string): DiffBlock[] => { + + const diffs = diffLines(oldText, newText); + + const blocks: DiffBlock[] = []; + let reprBlock: string[] = []; + let deletedBlock: string[] = []; + let insertedBlock: string[] = []; + let insertedLine = 0; + let deletedLine = 0; + let insertedStart = 0; + let deletedStart = 0; + + diffs.forEach(([operation, text]) => { + + const lines = text.split('\n'); + + switch (operation) { + + // insertion + case 1: + if (reprBlock.length === 0) { reprBlock.push('@@@@'); } + if (insertedBlock.length === 0) insertedStart = insertedLine; + insertedLine += lines.length - 1; // Update only the line count for new text + insertedBlock.push(text); + reprBlock.push(lines.map(line => `+ ${line}`).join('\n')); + break; + + // deletion + case -1: + if (reprBlock.length === 0) { reprBlock.push('@@@@'); } + if (deletedBlock.length === 0) deletedStart = deletedLine; + deletedLine += lines.length - 1; // Update only the line count for old text + deletedBlock.push(text); + reprBlock.push(lines.map(line => `- ${line}`).join('\n')); + break; + + // no change + case 0: + // If we have a pending block, add it to the blocks array + if (insertedBlock.length > 0 || deletedBlock.length > 0) { + blocks.push({ + code: reprBlock.join(''), + deletedCode: deletedBlock.join(''), + insertedCode: insertedBlock.join(''), + deletedRange: new vscode.Range(deletedStart, 0, deletedLine, Number.MAX_SAFE_INTEGER), + insertedRange: new vscode.Range(insertedStart, 0, insertedLine, Number.MAX_SAFE_INTEGER), + }); + } + + // Reset the block variables + reprBlock = []; + deletedBlock = []; + insertedBlock = []; + + // Update line counts for unchanged text + insertedLine += lines.length - 1; + deletedLine += lines.length - 1; + + break; + } + }); + + // Add any remaining blocks after the loop ends + if (insertedBlock.length > 0 || deletedBlock.length > 0) { + blocks.push({ + code: reprBlock.join('\n'), + deletedCode: deletedBlock.join('\n'), + insertedCode: insertedBlock.join('\n'), + deletedRange: new vscode.Range(deletedStart, 0, deletedLine, Number.MAX_SAFE_INTEGER), + insertedRange: new vscode.Range(insertedStart, 0, insertedLine, Number.MAX_SAFE_INTEGER), + }); + } + + return blocks; +}; + + + +// export const findDiffs = (oldText: string, newText: string): DiffBlock[] => { + +// const diffs = diffLines(oldText, newText); + +// const blocks: DiffBlock[] = []; + +// let reprBlock: string[] = []; +// let deletedBlock: string[] = []; +// let insertedBlock: string[] = []; + +// let insertedEnd = 0; +// let deletedEnd = 0; +// let insertedStart = 0; +// let deletedStart = 0; + +// diffs.forEach(part => { + +// part.count = part.count ?? 0 + +// // if the part is an addition or deletion, add it to the current block +// if (part.added || part.removed) { +// if (reprBlock.length === 0) { reprBlock.push('@@@@'); } +// if (part.added) { +// if (insertedBlock.length === 0) insertedStart = insertedEnd; +// insertedEnd += part.count +// insertedBlock.push(part.value); +// reprBlock.push(part.value.split('\n').map(line => `+ ${line}`).join('\n')); +// } +// if (part.removed) { +// if (deletedBlock.length === 0) deletedStart = deletedEnd; +// deletedEnd += part.count +// deletedBlock.push(part.value); +// reprBlock.push(part.value.split('\n').map(line => `- ${line}`).join('\n')); +// } +// } + +// // if the part is unchanged, finalize the block and add it to the array +// else { +// // if the block is not null, add it to the array +// if (insertedBlock.length > 0 || deletedBlock.length > 0) { +// blocks.push({ +// code: reprBlock.join('\n'), +// deletedCode: deletedBlock.join(''), +// insertedCode: insertedBlock.join(''), +// deletedRange: new vscode.Range(deletedStart, 0, deletedEnd, Number.MAX_SAFE_INTEGER), +// insertedRange: new vscode.Range(insertedStart, 0, insertedEnd, Number.MAX_SAFE_INTEGER), +// }); +// } + +// // update block variables +// reprBlock = []; +// deletedBlock = []; +// insertedBlock = []; +// insertedEnd += part.count; +// deletedEnd += part.count; + +// } + +// }) + +// // finally, add the last block to the array +// if (insertedBlock.length > 0 || deletedBlock.length > 0) { +// blocks.push({ +// code: reprBlock.join('\n'), +// deletedCode: deletedBlock.join(''), +// insertedCode: insertedBlock.join(''), +// deletedRange: new vscode.Range(deletedStart, 0, deletedEnd, Number.MAX_SAFE_INTEGER), +// insertedRange: new vscode.Range(insertedStart, 0, insertedEnd, Number.MAX_SAFE_INTEGER), +// }); +// } + +// return blocks; + +// } + diff --git a/extensions/void/src/getDiffedLines.ts b/extensions/void/src/getDiffedLines.ts deleted file mode 100644 index e16f56c9..00000000 --- a/extensions/void/src/getDiffedLines.ts +++ /dev/null @@ -1,93 +0,0 @@ -import { diffLines, Change } from 'diff'; - -export type SuggestedDiff = { - // start/end of current file - startLine: number, - endLine: number, - - // start/end of original file - originalStartLine: number, - originalEndLine: number, - - // original content (originalfile[originalStart...originalEnd]) - beforeCode: string; - afterCode: string; -} - -export function getDiffedLines(oldStr: string, newStr: string) { - // an ordered list of every original line, line added to the new file, and line removed from the old file (order is unambiguous, think about it) - - // replace \r\n with \n - oldStr = oldStr.replace(/\r\n/g, '\n') - newStr = newStr.replace(/\r\n/g, '\n') - - const lineByLineChanges: Change[] = diffLines(oldStr, newStr); - - lineByLineChanges.push({ value: '' }) // add a dummy so we flush any streaks we haven't yet at the very end (!line.added && !line.removed) - - let oldFileLineNum: number = 0; - let newFileLineNum: number = 0; - - let streakStartInNewFile: number | undefined = undefined - let streakStartInOldFile: number | undefined = undefined - - let oldStrLines = oldStr.split('\n') - let newStrLines = newStr.split('\n') - - const replacements: SuggestedDiff[] = [] - - for (let line of lineByLineChanges) { - // no change on this line - if (!line.added && !line.removed) { - // if we were on a streak, add it - if (streakStartInNewFile !== undefined) { - - const startLine = streakStartInNewFile - const endLine = newFileLineNum - 1 // don't include current line, the edit was up to this line but not including it - const newContent = newStrLines.slice(startLine, endLine + 1).join('\n') - - const originalStartLine = streakStartInOldFile! - const originalEndLine = oldFileLineNum - 1 // don't include current line, the edit was up to this line but not including it - const originalContent = oldStrLines.slice(originalStartLine, originalEndLine + 1).join('\n') - - const replacement: SuggestedDiff = { beforeCode: originalContent, afterCode: newContent, startLine, endLine, originalStartLine, originalEndLine, } - - replacements.push(replacement) - streakStartInNewFile = undefined - streakStartInOldFile = undefined - } - - oldFileLineNum += line.count ?? 0; - newFileLineNum += line.count ?? 0; - } - - - // line was removed from old file - else if (line.removed) { - - // if we weren't on a streak, start one on this current line num - if (streakStartInNewFile === undefined) { - streakStartInNewFile = newFileLineNum - streakStartInOldFile = oldFileLineNum - } - - oldFileLineNum += line.count ?? 0 // we processed the line so add 1 - } - - // line was added to new file - else if (line.added) { - - // if we weren't on a streak, start one on this current line num - if (streakStartInNewFile === undefined) { - streakStartInNewFile = newFileLineNum - streakStartInOldFile = oldFileLineNum - } - - newFileLineNum += line.count ?? 0; // we processed the line so add 1 - } - - } // end for - - return replacements - -} diff --git a/extensions/void/src/shared_types.ts b/extensions/void/src/shared_types.ts index f16fa3f5..0c8dc933 100644 --- a/extensions/void/src/shared_types.ts +++ b/extensions/void/src/shared_types.ts @@ -2,6 +2,8 @@ import * as vscode from 'vscode'; import { ApiConfig } from './common/sendLLMMessage'; + + // a selection is a frozen snapshot type CodeSelection = { selectionStr: string, selectionRange: vscode.Range, filePath: vscode.Uri } @@ -14,13 +16,20 @@ type DiffArea = { originalCode: string } -// each diff on the user's screen right now +// the return type of diff creator +type DiffBlock = { + code: string; + deletedRange: vscode.Range; + deletedCode: string; + insertedRange: vscode.Range; + insertedCode: string; +} + +// each diff on the user's screen type Diff = { diffid: number, lenses: vscode.CodeLens[], - greenRange: vscode.Range, - originalCode: string, // If a revert happens, we replace the greenRange with this content. -} +} & DiffBlock type WebviewMessage = ( @@ -44,9 +53,11 @@ type WebviewMessage = ( ) + type Command = WebviewMessage['type'] export { + DiffBlock, CodeSelection, File, WebviewMessage,