fix(xlsx): invalidate formula cachedValue on sheet removal

When a sheet is removed, formulas in other sheets that still reference
the deleted sheet by name kept their stale <x:v> (cached value). Real
Excel recomputes to #REF! on open, so officecli's Get was inconsistent.

- After sheet remove, walk remaining worksheets and clear CellValue on
  any formula whose body contains the removed sheet name (bare or
  single-quote wrapped).
- In CellToNode, suppress the evaluator-based cachedValue fallback when
  the formula references a sheet that no longer exists in the workbook;
  otherwise FormulaEvaluator.ResolveSheetCellResult silently returns 0
  and we report a fake cached value where Excel would show #REF!.
This commit is contained in:
zmworm 2026-04-19 02:07:54 +08:00
parent c28d1763cf
commit b38ce609e4
2 changed files with 101 additions and 1 deletions

View file

@ -1049,8 +1049,17 @@ public partial class ExcelHandler
var rawCached = cell.CellValue?.Text;
if (!string.IsNullOrEmpty(rawCached))
node.Format["cachedValue"] = rawCached;
else if (displayText != null && !displayText.StartsWith("="))
else if (displayText != null && !displayText.StartsWith("=") &&
!FormulaReferencesMissingSheet(formula))
{
// R9-1: do NOT fall back to an evaluated cachedValue when the
// formula references a sheet that no longer exists in the
// workbook. Otherwise cross-sheet refs whose target sheet
// was removed silently evaluate to "0" (see
// FormulaEvaluator.ResolveSheetCellResult), reporting a
// stale/fake cached value where Excel would show #REF!.
node.Format["cachedValue"] = displayText;
}
}
// Array formula readback — keys match Set input
if (cell.CellFormula?.FormulaType?.Value == CellFormulaValues.Array)
@ -3250,4 +3259,37 @@ public partial class ExcelHandler
}
}
}
/// <summary>
/// R9-1: scan a formula body for Sheet-qualified refs (bare `Sheet1!A1`
/// or quoted `'My Data'!A1`) and return true if any referenced sheet
/// name does not exist in the current workbook. Used to suppress the
/// evaluator-based cachedValue fallback when cross-sheet refs point at
/// a removed sheet — Real Excel shows `#REF!` there; we should not
/// invent a "0".
/// </summary>
private bool FormulaReferencesMissingSheet(string formula)
{
if (string.IsNullOrEmpty(formula)) return false;
var wb = _doc.WorkbookPart?.Workbook;
if (wb == null) return false;
var names = new HashSet<string>(
wb.Descendants<Sheet>().Select(s => s.Name?.Value ?? "").Where(n => n.Length > 0),
StringComparer.OrdinalIgnoreCase);
// Quoted form: '...'! — inner single quotes escaped as ''
foreach (System.Text.RegularExpressions.Match m in
System.Text.RegularExpressions.Regex.Matches(formula, @"'((?:[^']|'')+)'!"))
{
var name = m.Groups[1].Value.Replace("''", "'");
if (!names.Contains(name)) return true;
}
// Bare form: Name! — letters/digits/underscore/period (Excel allows these unquoted)
foreach (System.Text.RegularExpressions.Match m in
System.Text.RegularExpressions.Regex.Matches(formula, @"(?<![A-Za-z0-9_'.])([A-Za-z_][A-Za-z0-9_.]*)!"))
{
if (!names.Contains(m.Groups[1].Value)) return true;
}
return false;
}
}

View file

@ -126,6 +126,14 @@ public partial class ExcelHandler
if (!definedNames.HasChildren) definedNames.Remove();
}
// R9-1: invalidate stale cachedValue on formulas in other sheets
// that referenced the removed sheet. Real Excel would recompute
// to #REF! on open; our Get must not report the stale value.
// Minimum viable: clear <x:v> so cachedValue drops out. We leave
// the formula body alone — rewriting it to #REF! is what Excel
// does on recalc and is hard to get right.
InvalidateFormulaCacheReferencingSheet(workbookPart, sheetName);
// Fix ActiveTab to prevent workbook corruption when deleting the last tab
var remainingCount = sheets!.Elements<Sheet>().Count();
var bookViews = workbook.GetFirstChild<BookViews>();
@ -1243,6 +1251,56 @@ public partial class ExcelHandler
RegexOptions.IgnoreCase);
}
/// <summary>
/// R9-1: after a sheet is removed, walk every remaining worksheet's
/// formula cells and clear the CellValue on any formula that still
/// references the removed sheet by name (bare or single-quote wrapped).
/// We do not rewrite the formula body — that is Excel's job on recalc.
/// Clearing the cached value keeps officecli's Get consistent with the
/// state Real Excel presents when it opens the file.
/// </summary>
private void InvalidateFormulaCacheReferencingSheet(WorkbookPart workbookPart, string removedSheetName)
{
// Two literal match forms Excel uses for sheet-qualified refs:
// Sheet2!A1 (bare, no special chars)
// 'My Data'!A1 (quoted when name has spaces/specials)
// Internal single quotes in sheet names are escaped as '' inside
// the quoted form, but creating such names is rare and the
// Contains check below still handles the unescaped prefix.
var bareToken = removedSheetName + "!";
var quotedToken = "'" + removedSheetName.Replace("'", "''") + "'!";
foreach (var wsPart in workbookPart.WorksheetParts)
{
var sheetData = GetSheet(wsPart).GetFirstChild<SheetData>();
if (sheetData == null) continue;
bool touched = false;
foreach (var row in sheetData.Elements<Row>())
{
foreach (var cell in row.Elements<Cell>())
{
var formula = cell.CellFormula?.Text;
if (string.IsNullOrEmpty(formula)) continue;
if (formula.IndexOf(bareToken, StringComparison.OrdinalIgnoreCase) < 0 &&
formula.IndexOf(quotedToken, StringComparison.OrdinalIgnoreCase) < 0)
continue;
// Clear the cached value. CellValue element removed so
// Get reports null/missing cachedValue, matching Excel's
// initial state on open (before recalc fills in #REF!).
cell.CellValue?.Remove();
touched = true;
}
}
if (touched)
{
GetSheet(wsPart).Save();
}
}
}
/// <summary>
/// R10-2 / R2-1 shared helper. Drops a PivotTableCacheDefinitionPart and
/// its workbook-level &lt;pivotCache&gt; entry IF no remaining pivot