Better diff algorithm

This commit is contained in:
Mathew P 2024-10-13 00:15:41 -07:00
parent 496c0178de
commit 1557a44934
7 changed files with 235 additions and 140 deletions

View file

@ -9,11 +9,13 @@
"version": "0.0.1",
"dependencies": {
"@anthropic-ai/sdk": "^0.27.1",
"diff-match-patch": "^1.0.5",
"openai": "^4.57.0"
},
"devDependencies": {
"@eslint/js": "^9.9.1",
"@types/diff": "^5.2.2",
"@types/diff-match-patch": "^1.0.36",
"@types/jest": "^29.5.12",
"@types/mocha": "^10.0.8",
"@types/node": "^22.5.1",
@ -1001,6 +1003,12 @@
"dev": true,
"license": "MIT"
},
"node_modules/@types/diff-match-patch": {
"version": "1.0.36",
"resolved": "https://registry.npmjs.org/@types/diff-match-patch/-/diff-match-patch-1.0.36.tgz",
"integrity": "sha512-xFdR6tkm0MWvBfO8xXCSsinYxHcqkQUlcHeSpMC2ukzOb6lwQAfDmW+Qt0AvlGd8HpsS28qKsB+oPeJn9I39jg==",
"dev": true
},
"node_modules/@types/estree": {
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.6.tgz",
@ -2546,6 +2554,11 @@
"node": ">=0.3.1"
}
},
"node_modules/diff-match-patch": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/diff-match-patch/-/diff-match-patch-1.0.5.tgz",
"integrity": "sha512-IayShXAgj/QMXgB0IWmKx+rOPuGMhqm5w6jvFxmVenXKIzRqTAAsbBPT3kWQeGANj3jGgvcvv4yK6SxqYmikgw=="
},
"node_modules/diff-sequences": {
"version": "29.6.3",
"resolved": "https://registry.npmjs.org/diff-sequences/-/diff-sequences-29.6.3.tgz",

View file

@ -135,6 +135,7 @@
"devDependencies": {
"@eslint/js": "^9.9.1",
"@types/diff": "^5.2.2",
"@types/diff-match-patch": "^1.0.36",
"@types/jest": "^29.5.12",
"@types/mocha": "^10.0.8",
"@types/node": "^22.5.1",
@ -165,6 +166,7 @@
},
"dependencies": {
"@anthropic-ai/sdk": "^0.27.1",
"diff-match-patch": "^1.0.5",
"openai": "^4.57.0"
}
}

View file

@ -1,6 +1,6 @@
import * as vscode from 'vscode';
import { getDiffedLines, SuggestedDiff } from './getDiffedLines';
import { Diff, DiffArea } from './shared_types';
import { findDiffs } from './findDiffs';
import { Diff, DiffArea, DiffBlock } from './shared_types';
@ -142,7 +142,7 @@ export class DisplayChangesProvider implements vscode.CodeLensProvider {
const currentCode = editor.document.getText(new vscode.Range(diffArea.startLine, 0, diffArea.endLine, Number.MAX_SAFE_INTEGER)).replace(/\r\n/g, '\n')
// compute the diffs
const diffs = getDiffedLines(diffArea.originalCode, currentCode)
const diffs = findDiffs(diffArea.originalCode, currentCode)
// print diffs
console.log('!CODEBefore:', JSON.stringify(diffArea.originalCode))
@ -152,15 +152,27 @@ export class DisplayChangesProvider implements vscode.CodeLensProvider {
this.addDiffs(editor.document.uri, diffs)
for (const diff of this._diffsOfDocument[docUriStr]) {
console.log('originalCodeDiff:', JSON.stringify(diff.originalCode))
console.log('greenCodeDiff:', JSON.stringify(editor.document.getText(diff.greenRange).replace(/\r\n/g, '\n')))
console.log('------------')
console.log('deletedCode:', JSON.stringify(diff.deletedCode))
console.log('insertedCode:', JSON.stringify(diff.insertedCode))
console.log('deletedRange:', diff.deletedRange.start.line, diff.deletedRange.end.line,)
console.log('insertedRange:', diff.insertedRange.start.line, diff.insertedRange.end.line,)
}
}
// update highlighting
editor.setDecorations(greenDecoration, this._diffsOfDocument[docUriStr].map(diff => diff.greenRange))
// update green highlighting
editor.setDecorations(
greenDecoration,
(this._diffsOfDocument[docUriStr]
.filter(diff => diff.insertedRange !== undefined)
.map(diff => diff.insertedRange)
)
);
// TODO update red highlighting
// this._diffsOfDocument[docUriStr].map(diff => diff.deletedCode)
// update code lenses
this._onDidChangeCodeLenses.fire()
@ -168,7 +180,7 @@ export class DisplayChangesProvider implements vscode.CodeLensProvider {
}
// used by us only
public addDiffs(docUri: vscode.Uri, diffs: SuggestedDiff[]) {
public addDiffs(docUri: vscode.Uri, diffs: DiffBlock[]) {
const docUriStr = docUri.toString()
@ -176,43 +188,17 @@ export class DisplayChangesProvider implements vscode.CodeLensProvider {
if (!this._diffsOfDocument[docUriStr])
this._diffsOfDocument[docUriStr] = []
// 1. convert suggested diffs (which are described using line numbers) into actual diffs (described using vscode.Range, vscode.Uri)
// must do this before adding codelenses or highlighting so that codelens and highlights will apply to the fresh code and not the old code
// apply changes in reverse order so additions don't push down the line numbers of the next edit
let workspaceEdit = new vscode.WorkspaceEdit();
// add each diff and its codelens to the document
for (let i = diffs.length - 1; i > -1; i -= 1) {
let suggestedDiff = diffs[i]
let greenRange: vscode.Range
// INSERTIONS (e.g. {originalStartLine: 0, originalEndLine: -1})
if (suggestedDiff.originalStartLine > suggestedDiff.originalEndLine) {
const originalPosition = new vscode.Position(suggestedDiff.originalStartLine, 0)
workspaceEdit.insert(docUri, originalPosition, suggestedDiff.afterCode + '\n') // add back in the line we deleted when we made the startline->endline range go negative
greenRange = new vscode.Range(suggestedDiff.startLine, 0, suggestedDiff.endLine + 1, 0)
}
// DELETIONS
else if (suggestedDiff.startLine > suggestedDiff.endLine) {
const deleteRange = new vscode.Range(suggestedDiff.originalStartLine, 0, suggestedDiff.originalEndLine + 1, 0)
workspaceEdit.delete(docUri, deleteRange)
greenRange = new vscode.Range(suggestedDiff.startLine, 0, suggestedDiff.startLine, 0)
suggestedDiff.beforeCode += '\n' // add back in the line we deleted when we made the startline->endline range go negative
}
// REPLACEMENTS
else {
const originalRange = new vscode.Range(suggestedDiff.originalStartLine, 0, suggestedDiff.originalEndLine, Number.MAX_SAFE_INTEGER)
workspaceEdit.replace(docUri, originalRange, suggestedDiff.afterCode)
greenRange = new vscode.Range(suggestedDiff.startLine, 0, suggestedDiff.endLine, Number.MAX_SAFE_INTEGER)
}
this._diffsOfDocument[docUriStr].push({
...suggestedDiff,
diffid: this._diffidPool,
greenRange: greenRange,
originalCode: suggestedDiff.beforeCode,
// originalCode: suggestedDiff.deletedText,
lenses: [
new vscode.CodeLens(greenRange, { title: 'Accept', command: 'void.acceptDiff', arguments: [{ diffid: this._diffidPool }] }),
new vscode.CodeLens(greenRange, { title: 'Reject', command: 'void.rejectDiff', arguments: [{ diffid: this._diffidPool }] })
new vscode.CodeLens(suggestedDiff.insertedRange, { title: 'Accept', command: 'void.acceptDiff', arguments: [{ diffid: this._diffidPool }] }),
new vscode.CodeLens(suggestedDiff.insertedRange, { title: 'Reject', command: 'void.rejectDiff', arguments: [{ diffid: this._diffidPool }] })
]
});
this._diffidPool += 1
@ -261,17 +247,17 @@ export class DisplayChangesProvider implements vscode.CodeLensProvider {
return
}
const { greenRange: range, lenses, originalCode } = this._diffsOfDocument[docUriStr][index] // do this before we splice and mess up index
const { insertedRange: range, lenses, deletedCode } = this._diffsOfDocument[docUriStr][index] // do this before we splice and mess up index
// remove this diff from the diffsOfDocument[docStr] (can change this behavior in future if add something like history)
this._diffsOfDocument[docUriStr].splice(index, 1)
// clear the decoration in this diffs range
editor.setDecorations(greenDecoration, this._diffsOfDocument[docUriStr].map(diff => diff.greenRange))
// editor.setDecorations(greenDecoration, this._diffsOfDocument[docUriStr].map(diff => diff.insertionRange))
// REVERT THE CHANGE (this is the only part that's different from acceptDiff)
let workspaceEdit = new vscode.WorkspaceEdit();
workspaceEdit.replace(docUri, range, originalCode);
// workspaceEdit.replace(docUri, range, deletedCode);
this._weAreEditing = true
await vscode.workspace.applyEdit(workspaceEdit)
await vscode.workspace.save(docUri)

View file

@ -57,7 +57,7 @@ export function activate(context: vscode.ExtensionContext) {
// vscode.commands.executeCommand('vscode.moveViewToPanel', CustomViewProvider.viewId); // move to aux bar
// get the text the user is selecting
const selectionStr = editor.document.getText(editor.selection);
const selectionStr = editor.document.getText(editor.selection);5
// get the range of the selection
const selectionRange = editor.selection;

View file

@ -0,0 +1,176 @@
import * as vscode from 'vscode';
// import { diffLines, Change } from 'diff';
import { DiffBlock } from './shared_types';
import { diff_match_patch } from 'diff-match-patch';
const diffLines = (text1: string, text2: string) => {
var dmp = new diff_match_patch();
var a = dmp.diff_linesToChars_(text1, text2);
var lineText1 = a.chars1;
var lineText2 = a.chars2;
var lineArray = a.lineArray;
var diffs = dmp.diff_main(lineText1, lineText2, false);
dmp.diff_charsToLines_(diffs, lineArray);
// dmp.diff_cleanupSemantic(diffs);
return diffs;
}
// TODO use a better diff algorithm
export const findDiffs = (oldText: string, newText: string): DiffBlock[] => {
const diffs = diffLines(oldText, newText);
const blocks: DiffBlock[] = [];
let reprBlock: string[] = [];
let deletedBlock: string[] = [];
let insertedBlock: string[] = [];
let insertedLine = 0;
let deletedLine = 0;
let insertedStart = 0;
let deletedStart = 0;
diffs.forEach(([operation, text]) => {
const lines = text.split('\n');
switch (operation) {
// insertion
case 1:
if (reprBlock.length === 0) { reprBlock.push('@@@@'); }
if (insertedBlock.length === 0) insertedStart = insertedLine;
insertedLine += lines.length - 1; // Update only the line count for new text
insertedBlock.push(text);
reprBlock.push(lines.map(line => `+ ${line}`).join('\n'));
break;
// deletion
case -1:
if (reprBlock.length === 0) { reprBlock.push('@@@@'); }
if (deletedBlock.length === 0) deletedStart = deletedLine;
deletedLine += lines.length - 1; // Update only the line count for old text
deletedBlock.push(text);
reprBlock.push(lines.map(line => `- ${line}`).join('\n'));
break;
// no change
case 0:
// If we have a pending block, add it to the blocks array
if (insertedBlock.length > 0 || deletedBlock.length > 0) {
blocks.push({
code: reprBlock.join(''),
deletedCode: deletedBlock.join(''),
insertedCode: insertedBlock.join(''),
deletedRange: new vscode.Range(deletedStart, 0, deletedLine, Number.MAX_SAFE_INTEGER),
insertedRange: new vscode.Range(insertedStart, 0, insertedLine, Number.MAX_SAFE_INTEGER),
});
}
// Reset the block variables
reprBlock = [];
deletedBlock = [];
insertedBlock = [];
// Update line counts for unchanged text
insertedLine += lines.length - 1;
deletedLine += lines.length - 1;
break;
}
});
// Add any remaining blocks after the loop ends
if (insertedBlock.length > 0 || deletedBlock.length > 0) {
blocks.push({
code: reprBlock.join('\n'),
deletedCode: deletedBlock.join('\n'),
insertedCode: insertedBlock.join('\n'),
deletedRange: new vscode.Range(deletedStart, 0, deletedLine, Number.MAX_SAFE_INTEGER),
insertedRange: new vscode.Range(insertedStart, 0, insertedLine, Number.MAX_SAFE_INTEGER),
});
}
return blocks;
};
// export const findDiffs = (oldText: string, newText: string): DiffBlock[] => {
// const diffs = diffLines(oldText, newText);
// const blocks: DiffBlock[] = [];
// let reprBlock: string[] = [];
// let deletedBlock: string[] = [];
// let insertedBlock: string[] = [];
// let insertedEnd = 0;
// let deletedEnd = 0;
// let insertedStart = 0;
// let deletedStart = 0;
// diffs.forEach(part => {
// part.count = part.count ?? 0
// // if the part is an addition or deletion, add it to the current block
// if (part.added || part.removed) {
// if (reprBlock.length === 0) { reprBlock.push('@@@@'); }
// if (part.added) {
// if (insertedBlock.length === 0) insertedStart = insertedEnd;
// insertedEnd += part.count
// insertedBlock.push(part.value);
// reprBlock.push(part.value.split('\n').map(line => `+ ${line}`).join('\n'));
// }
// if (part.removed) {
// if (deletedBlock.length === 0) deletedStart = deletedEnd;
// deletedEnd += part.count
// deletedBlock.push(part.value);
// reprBlock.push(part.value.split('\n').map(line => `- ${line}`).join('\n'));
// }
// }
// // if the part is unchanged, finalize the block and add it to the array
// else {
// // if the block is not null, add it to the array
// if (insertedBlock.length > 0 || deletedBlock.length > 0) {
// blocks.push({
// code: reprBlock.join('\n'),
// deletedCode: deletedBlock.join(''),
// insertedCode: insertedBlock.join(''),
// deletedRange: new vscode.Range(deletedStart, 0, deletedEnd, Number.MAX_SAFE_INTEGER),
// insertedRange: new vscode.Range(insertedStart, 0, insertedEnd, Number.MAX_SAFE_INTEGER),
// });
// }
// // update block variables
// reprBlock = [];
// deletedBlock = [];
// insertedBlock = [];
// insertedEnd += part.count;
// deletedEnd += part.count;
// }
// })
// // finally, add the last block to the array
// if (insertedBlock.length > 0 || deletedBlock.length > 0) {
// blocks.push({
// code: reprBlock.join('\n'),
// deletedCode: deletedBlock.join(''),
// insertedCode: insertedBlock.join(''),
// deletedRange: new vscode.Range(deletedStart, 0, deletedEnd, Number.MAX_SAFE_INTEGER),
// insertedRange: new vscode.Range(insertedStart, 0, insertedEnd, Number.MAX_SAFE_INTEGER),
// });
// }
// return blocks;
// }

View file

@ -1,93 +0,0 @@
import { diffLines, Change } from 'diff';
export type SuggestedDiff = {
// start/end of current file
startLine: number,
endLine: number,
// start/end of original file
originalStartLine: number,
originalEndLine: number,
// original content (originalfile[originalStart...originalEnd])
beforeCode: string;
afterCode: string;
}
export function getDiffedLines(oldStr: string, newStr: string) {
// an ordered list of every original line, line added to the new file, and line removed from the old file (order is unambiguous, think about it)
// replace \r\n with \n
oldStr = oldStr.replace(/\r\n/g, '\n')
newStr = newStr.replace(/\r\n/g, '\n')
const lineByLineChanges: Change[] = diffLines(oldStr, newStr);
lineByLineChanges.push({ value: '' }) // add a dummy so we flush any streaks we haven't yet at the very end (!line.added && !line.removed)
let oldFileLineNum: number = 0;
let newFileLineNum: number = 0;
let streakStartInNewFile: number | undefined = undefined
let streakStartInOldFile: number | undefined = undefined
let oldStrLines = oldStr.split('\n')
let newStrLines = newStr.split('\n')
const replacements: SuggestedDiff[] = []
for (let line of lineByLineChanges) {
// no change on this line
if (!line.added && !line.removed) {
// if we were on a streak, add it
if (streakStartInNewFile !== undefined) {
const startLine = streakStartInNewFile
const endLine = newFileLineNum - 1 // don't include current line, the edit was up to this line but not including it
const newContent = newStrLines.slice(startLine, endLine + 1).join('\n')
const originalStartLine = streakStartInOldFile!
const originalEndLine = oldFileLineNum - 1 // don't include current line, the edit was up to this line but not including it
const originalContent = oldStrLines.slice(originalStartLine, originalEndLine + 1).join('\n')
const replacement: SuggestedDiff = { beforeCode: originalContent, afterCode: newContent, startLine, endLine, originalStartLine, originalEndLine, }
replacements.push(replacement)
streakStartInNewFile = undefined
streakStartInOldFile = undefined
}
oldFileLineNum += line.count ?? 0;
newFileLineNum += line.count ?? 0;
}
// line was removed from old file
else if (line.removed) {
// if we weren't on a streak, start one on this current line num
if (streakStartInNewFile === undefined) {
streakStartInNewFile = newFileLineNum
streakStartInOldFile = oldFileLineNum
}
oldFileLineNum += line.count ?? 0 // we processed the line so add 1
}
// line was added to new file
else if (line.added) {
// if we weren't on a streak, start one on this current line num
if (streakStartInNewFile === undefined) {
streakStartInNewFile = newFileLineNum
streakStartInOldFile = oldFileLineNum
}
newFileLineNum += line.count ?? 0; // we processed the line so add 1
}
} // end for
return replacements
}

View file

@ -2,6 +2,8 @@
import * as vscode from 'vscode';
import { ApiConfig } from './common/sendLLMMessage';
// a selection is a frozen snapshot
type CodeSelection = { selectionStr: string, selectionRange: vscode.Range, filePath: vscode.Uri }
@ -14,13 +16,20 @@ type DiffArea = {
originalCode: string
}
// each diff on the user's screen right now
// the return type of diff creator
type DiffBlock = {
code: string;
deletedRange: vscode.Range;
deletedCode: string;
insertedRange: vscode.Range;
insertedCode: string;
}
// each diff on the user's screen
type Diff = {
diffid: number,
lenses: vscode.CodeLens[],
greenRange: vscode.Range,
originalCode: string, // If a revert happens, we replace the greenRange with this content.
}
} & DiffBlock
type WebviewMessage = (
@ -44,9 +53,11 @@ type WebviewMessage = (
)
type Command = WebviewMessage['type']
export {
DiffBlock,
CodeSelection,
File,
WebviewMessage,