OfficeCLI/src/officecli/Handlers/Excel/ExcelHandler.View.cs

485 lines
19 KiB
C#
Raw Normal View History

// Copyright 2025 OfficeCli (officecli.ai)
// SPDX-License-Identifier: Apache-2.0
using System.Text;
using System.Text.Json.Nodes;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using OfficeCli.Core;
namespace OfficeCli.Handlers;
public partial class ExcelHandler
{
public string ViewAsText(int? startLine = null, int? endLine = null, int? maxLines = null, HashSet<string>? cols = null)
{
var sb = new StringBuilder();
var sheets = GetWorksheets();
int sheetIdx = 0;
int emitted = 0;
bool truncated = false;
foreach (var (sheetName, worksheetPart) in sheets)
{
if (truncated) break;
sb.AppendLine($"=== Sheet: {sheetName} ===");
var sheetData = GetSheet(worksheetPart).GetFirstChild<SheetData>();
if (sheetData == null) continue;
int totalRows = sheetData.Elements<Row>().Count();
var evaluator = new Core.FormulaEvaluator(sheetData, _doc.WorkbookPart);
int lineNum = 0;
foreach (var row in sheetData.Elements<Row>())
{
lineNum++;
if (startLine.HasValue && lineNum < startLine.Value) continue;
if (endLine.HasValue && lineNum > endLine.Value) break;
if (maxLines.HasValue && emitted >= maxLines.Value)
{
sb.AppendLine($"... (showed {emitted} rows, {totalRows} total in sheet, use --start/--end to view more)");
truncated = true;
break;
}
var cellElements = row.Elements<Cell>();
if (cols != null)
cellElements = cellElements.Where(c => cols.Contains(ParseCellReference(c.CellReference?.Value ?? "A1").Column));
var cells = cellElements.Select(c => GetCellDisplayValue(c, evaluator)).ToArray();
var rowRef = row.RowIndex?.Value ?? (uint)lineNum;
sb.AppendLine($"[/{sheetName}/row[{rowRef}]] {string.Join("\t", cells)}");
emitted++;
}
sheetIdx++;
if (sheetIdx < sheets.Count) sb.AppendLine();
}
return sb.ToString().TrimEnd();
}
public string ViewAsAnnotated(int? startLine = null, int? endLine = null, int? maxLines = null, HashSet<string>? cols = null)
{
var sb = new StringBuilder();
var sheets = GetWorksheets();
int emitted = 0;
bool truncated = false;
foreach (var (sheetName, worksheetPart) in sheets)
{
if (truncated) break;
sb.AppendLine($"=== Sheet: {sheetName} ===");
var sheetData = GetSheet(worksheetPart).GetFirstChild<SheetData>();
if (sheetData == null) continue;
int totalRows = sheetData.Elements<Row>().Count();
int lineNum = 0;
foreach (var row in sheetData.Elements<Row>())
{
lineNum++;
if (startLine.HasValue && lineNum < startLine.Value) continue;
if (endLine.HasValue && lineNum > endLine.Value) break;
if (maxLines.HasValue && emitted >= maxLines.Value)
{
sb.AppendLine($"... (showed {emitted} rows, {totalRows} total in sheet, use --start/--end to view more)");
truncated = true;
break;
}
var cellElements = row.Elements<Cell>();
if (cols != null)
cellElements = cellElements.Where(c => cols.Contains(ParseCellReference(c.CellReference?.Value ?? "A1").Column));
foreach (var cell in cellElements)
{
var cellRef = cell.CellReference?.Value ?? "?";
var value = GetCellDisplayValue(cell);
var formula = cell.CellFormula?.Text;
var type = cell.DataType?.InnerText ?? "Number";
var annotation = formula != null ? $"={formula}" : type;
var warn = "";
if (string.IsNullOrEmpty(value) && formula == null)
warn = " \u26a0 empty";
else if (formula != null && (value == "#REF!" || value == "#VALUE!" || value == "#NAME?"))
warn = " \u26a0 formula error";
sb.AppendLine($" {cellRef}: [{value}] \u2190 {annotation}{warn}");
}
emitted++;
}
}
return sb.ToString().TrimEnd();
}
public string ViewAsOutline()
{
var sb = new StringBuilder();
var workbook = _doc.WorkbookPart?.Workbook;
if (workbook == null) return "(empty workbook)";
var sheets = workbook.GetFirstChild<Sheets>();
if (sheets == null) return "(no sheets)";
sb.AppendLine($"File: {Path.GetFileName(_filePath)}");
foreach (var sheet in sheets.Elements<Sheet>())
{
var name = sheet.Name?.Value ?? "?";
var sheetId = sheet.Id?.Value;
if (sheetId == null) continue;
var worksheetPart = (WorksheetPart)_doc.WorkbookPart!.GetPartById(sheetId);
var worksheet = GetSheet(worksheetPart);
var sheetData = worksheet.GetFirstChild<SheetData>();
int rowCount = sheetData?.Elements<Row>().Count() ?? 0;
int colCount = GetSheetColumnCount(worksheet, sheetData);
int formulaCount = 0;
if (sheetData != null)
{
formulaCount = sheetData.Descendants<CellFormula>().Count();
}
var formulaInfo = formulaCount > 0 ? $", {formulaCount} formula(s)" : "";
// Pivot tables are stored as pivotTableDefinition XML; their rendered cells
// are NOT materialized into sheetData (Excel/Calc re-render from pivotCacheRecords
// at display time). Without this hint, a pivot-only sheet looks like "0 rows × 0 cols"
// and users think it's empty. Surface the pivot count explicitly — same strategy POI
// takes via XSSFSheet.getPivotTables(). See also: query pivottable.
int pivotCount = worksheetPart.PivotTableParts.Count();
var pivotInfo = pivotCount > 0 ? $", {pivotCount} pivot table(s)" : "";
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format consistency. Shared OleHelper provides ProgID detection, part cleanup on failure, self-embed handling, and input validation. Highlights: - Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE, v:shape@alt for name, body scoped query with attr filter, SDT support - Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision for unit-qualified width/height round-trip, VML placeholder shape - PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and path styles), full set vocabulary incl. name/display/width/height - Canonical Format schema: progId, contentType, fileSize, width, height, display, name, relId (cross-format consistent) - get --save <path> extracts embedded binary payload with savedTo/savedBytes - raw <sheet>/<relId> fallback for inspecting OLE embedded parts - view outline/stats/text report OLE counts and placeholders uniformly - Query attribute filter supports [progId=], [fileSize>], [progId~=], etc. - Resident mode: supports add/set/remove/swap/get --save for OLE - Help: docx/xlsx/pptx add help documents OLE type + properties - Aliases: ole, oleobject, object, embed (add + query, case-insensitive) Error handling: - ProgID validation (≤39 chars, no leading digit, valid COM chars) - Empty source file emits stderr warning - Locked source file surfaces friendly "close resident first" hint - Negative width/height rejected on set - Excel display/name explicitly rejected (schema unsupported) - Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
int oleCount = CountSheetOleObjects(worksheetPart);
var oleInfo = oleCount > 0 ? $", {oleCount} ole object(s)" : "";
sb.AppendLine($"\u251c\u2500\u2500 \"{name}\" ({rowCount} rows \u00d7 {colCount} cols{formulaInfo}{pivotInfo}{oleInfo})");
}
return sb.ToString().TrimEnd();
}
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format consistency. Shared OleHelper provides ProgID detection, part cleanup on failure, self-embed handling, and input validation. Highlights: - Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE, v:shape@alt for name, body scoped query with attr filter, SDT support - Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision for unit-qualified width/height round-trip, VML placeholder shape - PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and path styles), full set vocabulary incl. name/display/width/height - Canonical Format schema: progId, contentType, fileSize, width, height, display, name, relId (cross-format consistent) - get --save <path> extracts embedded binary payload with savedTo/savedBytes - raw <sheet>/<relId> fallback for inspecting OLE embedded parts - view outline/stats/text report OLE counts and placeholders uniformly - Query attribute filter supports [progId=], [fileSize>], [progId~=], etc. - Resident mode: supports add/set/remove/swap/get --save for OLE - Help: docx/xlsx/pptx add help documents OLE type + properties - Aliases: ole, oleobject, object, embed (add + query, case-insensitive) Error handling: - ProgID validation (≤39 chars, no leading digit, valid COM chars) - Empty source file emits stderr warning - Locked source file surfaces friendly "close resident first" hint - Negative width/height rejected on set - Excel display/name explicitly rejected (schema unsupported) - Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
// CONSISTENCY(ole-stats): per-sheet OLE counter shared by outline and
// outlineJson. Same dedup rule as ViewAsStats — referenced oleObject
// elements count once, orphan embedded/package parts add extras.
private int CountSheetOleObjects(WorksheetPart worksheetPart)
{
int count = 0;
var referenced = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var oleEl in GetSheet(worksheetPart).Descendants<OleObject>())
{
count++;
if (oleEl.Id?.Value is string rid && !string.IsNullOrEmpty(rid))
referenced.Add(rid);
}
count += worksheetPart.EmbeddedObjectParts.Count(p => !referenced.Contains(worksheetPart.GetIdOfPart(p)));
count += worksheetPart.EmbeddedPackageParts.Count(p => !referenced.Contains(worksheetPart.GetIdOfPart(p)));
return count;
}
public string ViewAsStats()
{
var sb = new StringBuilder();
var sheets = GetWorksheets();
int totalCells = 0;
int emptyCells = 0;
int formulaCells = 0;
int errorCells = 0;
var typeCounts = new Dictionary<string, int>();
foreach (var (sheetName, worksheetPart) in sheets)
{
var sheetData = GetSheet(worksheetPart).GetFirstChild<SheetData>();
if (sheetData == null) continue;
foreach (var row in sheetData.Elements<Row>())
{
foreach (var cell in row.Elements<Cell>())
{
totalCells++;
var value = GetCellDisplayValue(cell);
if (string.IsNullOrEmpty(value)) emptyCells++;
if (cell.CellFormula != null) formulaCells++;
if (value is "#REF!" or "#VALUE!" or "#NAME?" or "#DIV/0!") errorCells++;
var type = cell.DataType?.InnerText ?? "Number";
typeCounts[type] = typeCounts.GetValueOrDefault(type) + 1;
}
}
}
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format consistency. Shared OleHelper provides ProgID detection, part cleanup on failure, self-embed handling, and input validation. Highlights: - Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE, v:shape@alt for name, body scoped query with attr filter, SDT support - Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision for unit-qualified width/height round-trip, VML placeholder shape - PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and path styles), full set vocabulary incl. name/display/width/height - Canonical Format schema: progId, contentType, fileSize, width, height, display, name, relId (cross-format consistent) - get --save <path> extracts embedded binary payload with savedTo/savedBytes - raw <sheet>/<relId> fallback for inspecting OLE embedded parts - view outline/stats/text report OLE counts and placeholders uniformly - Query attribute filter supports [progId=], [fileSize>], [progId~=], etc. - Resident mode: supports add/set/remove/swap/get --save for OLE - Help: docx/xlsx/pptx add help documents OLE type + properties - Aliases: ole, oleobject, object, embed (add + query, case-insensitive) Error handling: - ProgID validation (≤39 chars, no leading digit, valid COM chars) - Empty source file emits stderr warning - Locked source file surfaces friendly "close resident first" hint - Negative width/height rejected on set - Excel display/name explicitly rejected (schema unsupported) - Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
// OLE object count across all sheets. Same dedup rule as
// CollectOleNodesForSheet: referenced parts count as one entry
// (via their oleObject element), orphan parts add extras.
int oleCount = 0;
foreach (var (_, worksheetPart) in sheets)
{
var referenced = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var oleEl in GetSheet(worksheetPart).Descendants<OleObject>())
{
oleCount++;
if (oleEl.Id?.Value is string rid && !string.IsNullOrEmpty(rid))
referenced.Add(rid);
}
oleCount += worksheetPart.EmbeddedObjectParts.Count(p => !referenced.Contains(worksheetPart.GetIdOfPart(p)));
oleCount += worksheetPart.EmbeddedPackageParts.Count(p => !referenced.Contains(worksheetPart.GetIdOfPart(p)));
}
sb.AppendLine($"Sheets: {sheets.Count}");
sb.AppendLine($"Total Cells: {totalCells}");
sb.AppendLine($"Empty Cells: {emptyCells}");
sb.AppendLine($"Formula Cells: {formulaCells}");
sb.AppendLine($"Error Cells: {errorCells}");
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format consistency. Shared OleHelper provides ProgID detection, part cleanup on failure, self-embed handling, and input validation. Highlights: - Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE, v:shape@alt for name, body scoped query with attr filter, SDT support - Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision for unit-qualified width/height round-trip, VML placeholder shape - PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and path styles), full set vocabulary incl. name/display/width/height - Canonical Format schema: progId, contentType, fileSize, width, height, display, name, relId (cross-format consistent) - get --save <path> extracts embedded binary payload with savedTo/savedBytes - raw <sheet>/<relId> fallback for inspecting OLE embedded parts - view outline/stats/text report OLE counts and placeholders uniformly - Query attribute filter supports [progId=], [fileSize>], [progId~=], etc. - Resident mode: supports add/set/remove/swap/get --save for OLE - Help: docx/xlsx/pptx add help documents OLE type + properties - Aliases: ole, oleobject, object, embed (add + query, case-insensitive) Error handling: - ProgID validation (≤39 chars, no leading digit, valid COM chars) - Empty source file emits stderr warning - Locked source file surfaces friendly "close resident first" hint - Negative width/height rejected on set - Excel display/name explicitly rejected (schema unsupported) - Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
if (oleCount > 0) sb.AppendLine($"OLE Objects: {oleCount}");
sb.AppendLine();
sb.AppendLine("Data Type Distribution:");
foreach (var (type, count) in typeCounts.OrderByDescending(kv => kv.Value))
sb.AppendLine($" {type}: {count}");
return sb.ToString().TrimEnd();
}
public JsonNode ViewAsStatsJson()
{
var sheets = GetWorksheets();
int totalCells = 0, emptyCells = 0, formulaCells = 0, errorCells = 0;
var typeCounts = new Dictionary<string, int>();
foreach (var (sheetName, worksheetPart) in sheets)
{
var sheetData = GetSheet(worksheetPart).GetFirstChild<SheetData>();
if (sheetData == null) continue;
foreach (var row in sheetData.Elements<Row>())
foreach (var cell in row.Elements<Cell>())
{
totalCells++;
var value = GetCellDisplayValue(cell);
if (string.IsNullOrEmpty(value)) emptyCells++;
if (cell.CellFormula != null) formulaCells++;
if (value is "#REF!" or "#VALUE!" or "#NAME?" or "#DIV/0!") errorCells++;
var type = cell.DataType?.InnerText ?? "Number";
typeCounts[type] = typeCounts.GetValueOrDefault(type) + 1;
}
}
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format consistency. Shared OleHelper provides ProgID detection, part cleanup on failure, self-embed handling, and input validation. Highlights: - Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE, v:shape@alt for name, body scoped query with attr filter, SDT support - Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision for unit-qualified width/height round-trip, VML placeholder shape - PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and path styles), full set vocabulary incl. name/display/width/height - Canonical Format schema: progId, contentType, fileSize, width, height, display, name, relId (cross-format consistent) - get --save <path> extracts embedded binary payload with savedTo/savedBytes - raw <sheet>/<relId> fallback for inspecting OLE embedded parts - view outline/stats/text report OLE counts and placeholders uniformly - Query attribute filter supports [progId=], [fileSize>], [progId~=], etc. - Resident mode: supports add/set/remove/swap/get --save for OLE - Help: docx/xlsx/pptx add help documents OLE type + properties - Aliases: ole, oleobject, object, embed (add + query, case-insensitive) Error handling: - ProgID validation (≤39 chars, no leading digit, valid COM chars) - Empty source file emits stderr warning - Locked source file surfaces friendly "close resident first" hint - Negative width/height rejected on set - Excel display/name explicitly rejected (schema unsupported) - Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
int oleCountJson = 0;
foreach (var (_, worksheetPart) in sheets)
{
var refSet = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var oleEl in GetSheet(worksheetPart).Descendants<OleObject>())
{
oleCountJson++;
if (oleEl.Id?.Value is string rid && !string.IsNullOrEmpty(rid))
refSet.Add(rid);
}
oleCountJson += worksheetPart.EmbeddedObjectParts.Count(p => !refSet.Contains(worksheetPart.GetIdOfPart(p)));
oleCountJson += worksheetPart.EmbeddedPackageParts.Count(p => !refSet.Contains(worksheetPart.GetIdOfPart(p)));
}
var result = new JsonObject
{
["sheets"] = sheets.Count,
["totalCells"] = totalCells,
["emptyCells"] = emptyCells,
["formulaCells"] = formulaCells,
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format consistency. Shared OleHelper provides ProgID detection, part cleanup on failure, self-embed handling, and input validation. Highlights: - Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE, v:shape@alt for name, body scoped query with attr filter, SDT support - Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision for unit-qualified width/height round-trip, VML placeholder shape - PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and path styles), full set vocabulary incl. name/display/width/height - Canonical Format schema: progId, contentType, fileSize, width, height, display, name, relId (cross-format consistent) - get --save <path> extracts embedded binary payload with savedTo/savedBytes - raw <sheet>/<relId> fallback for inspecting OLE embedded parts - view outline/stats/text report OLE counts and placeholders uniformly - Query attribute filter supports [progId=], [fileSize>], [progId~=], etc. - Resident mode: supports add/set/remove/swap/get --save for OLE - Help: docx/xlsx/pptx add help documents OLE type + properties - Aliases: ole, oleobject, object, embed (add + query, case-insensitive) Error handling: - ProgID validation (≤39 chars, no leading digit, valid COM chars) - Empty source file emits stderr warning - Locked source file surfaces friendly "close resident first" hint - Negative width/height rejected on set - Excel display/name explicitly rejected (schema unsupported) - Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
["errorCells"] = errorCells,
["oleObjects"] = oleCountJson,
};
var types = new JsonObject();
foreach (var (type, count) in typeCounts.OrderByDescending(kv => kv.Value))
types[type] = count;
result["dataTypeDistribution"] = types;
return result;
}
public JsonNode ViewAsOutlineJson()
{
var workbook = _doc.WorkbookPart?.Workbook;
if (workbook == null) return new JsonObject();
var sheetsEl = workbook.GetFirstChild<Sheets>();
if (sheetsEl == null) return new JsonObject { ["fileName"] = Path.GetFileName(_filePath), ["sheets"] = new JsonArray() };
var sheetsArray = new JsonArray();
foreach (var sheet in sheetsEl.Elements<Sheet>())
{
var name = sheet.Name?.Value ?? "?";
var sheetId = sheet.Id?.Value;
if (sheetId == null) continue;
var worksheetPart = (WorksheetPart)_doc.WorkbookPart!.GetPartById(sheetId);
var worksheet = GetSheet(worksheetPart);
var sheetData = worksheet.GetFirstChild<SheetData>();
int rowCount = sheetData?.Elements<Row>().Count() ?? 0;
int colCount = GetSheetColumnCount(worksheet, sheetData);
int formulaCount = sheetData?.Descendants<CellFormula>().Count() ?? 0;
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format consistency. Shared OleHelper provides ProgID detection, part cleanup on failure, self-embed handling, and input validation. Highlights: - Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE, v:shape@alt for name, body scoped query with attr filter, SDT support - Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision for unit-qualified width/height round-trip, VML placeholder shape - PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and path styles), full set vocabulary incl. name/display/width/height - Canonical Format schema: progId, contentType, fileSize, width, height, display, name, relId (cross-format consistent) - get --save <path> extracts embedded binary payload with savedTo/savedBytes - raw <sheet>/<relId> fallback for inspecting OLE embedded parts - view outline/stats/text report OLE counts and placeholders uniformly - Query attribute filter supports [progId=], [fileSize>], [progId~=], etc. - Resident mode: supports add/set/remove/swap/get --save for OLE - Help: docx/xlsx/pptx add help documents OLE type + properties - Aliases: ole, oleobject, object, embed (add + query, case-insensitive) Error handling: - ProgID validation (≤39 chars, no leading digit, valid COM chars) - Empty source file emits stderr warning - Locked source file surfaces friendly "close resident first" hint - Negative width/height rejected on set - Excel display/name explicitly rejected (schema unsupported) - Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
int oleCount = CountSheetOleObjects(worksheetPart);
var sheetObj = new JsonObject
{
["name"] = name,
["rows"] = rowCount,
["cols"] = colCount,
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format consistency. Shared OleHelper provides ProgID detection, part cleanup on failure, self-embed handling, and input validation. Highlights: - Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE, v:shape@alt for name, body scoped query with attr filter, SDT support - Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision for unit-qualified width/height round-trip, VML placeholder shape - PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and path styles), full set vocabulary incl. name/display/width/height - Canonical Format schema: progId, contentType, fileSize, width, height, display, name, relId (cross-format consistent) - get --save <path> extracts embedded binary payload with savedTo/savedBytes - raw <sheet>/<relId> fallback for inspecting OLE embedded parts - view outline/stats/text report OLE counts and placeholders uniformly - Query attribute filter supports [progId=], [fileSize>], [progId~=], etc. - Resident mode: supports add/set/remove/swap/get --save for OLE - Help: docx/xlsx/pptx add help documents OLE type + properties - Aliases: ole, oleobject, object, embed (add + query, case-insensitive) Error handling: - ProgID validation (≤39 chars, no leading digit, valid COM chars) - Empty source file emits stderr warning - Locked source file surfaces friendly "close resident first" hint - Negative width/height rejected on set - Excel display/name explicitly rejected (schema unsupported) - Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
["formulas"] = formulaCount,
["oleObjects"] = oleCount
};
sheetsArray.Add((JsonNode)sheetObj);
}
return new JsonObject
{
["fileName"] = Path.GetFileName(_filePath),
["sheets"] = sheetsArray
};
}
public JsonNode ViewAsTextJson(int? startLine = null, int? endLine = null, int? maxLines = null, HashSet<string>? cols = null)
{
var sheetsArray = new JsonArray();
var worksheets = GetWorksheets();
int emitted = 0;
bool truncated = false;
foreach (var (sheetName, worksheetPart) in worksheets)
{
if (truncated) break;
var sheetData = GetSheet(worksheetPart).GetFirstChild<SheetData>();
if (sheetData == null) continue;
var rowsArray = new JsonArray();
int lineNum = 0;
foreach (var row in sheetData.Elements<Row>())
{
lineNum++;
if (startLine.HasValue && lineNum < startLine.Value) continue;
if (endLine.HasValue && lineNum > endLine.Value) break;
if (maxLines.HasValue && emitted >= maxLines.Value) { truncated = true; break; }
var cellElements = row.Elements<Cell>();
if (cols != null)
cellElements = cellElements.Where(c => cols.Contains(ParseCellReference(c.CellReference?.Value ?? "A1").Column));
var cellsObj = new JsonObject();
foreach (var cell in cellElements)
{
var cellRef = cell.CellReference?.Value ?? "?";
cellsObj[cellRef] = GetCellDisplayValue(cell);
}
var rowRef = row.RowIndex?.Value ?? (uint)lineNum;
rowsArray.Add((JsonNode)new JsonObject
{
["row"] = (int)rowRef,
["cells"] = cellsObj
});
emitted++;
}
sheetsArray.Add((JsonNode)new JsonObject
{
["name"] = sheetName,
["rows"] = rowsArray
});
}
return new JsonObject { ["sheets"] = sheetsArray };
}
private static int GetSheetColumnCount(Worksheet worksheet, SheetData? sheetData)
{
// Try SheetDimension first (e.g., <dimension ref="A1:F20"/>)
var dimRef = worksheet.GetFirstChild<SheetDimension>()?.Reference?.Value;
if (!string.IsNullOrEmpty(dimRef))
{
var parts = dimRef.Split(':');
if (parts.Length == 2)
{
var endRef = parts[1];
var col = new string(endRef.TakeWhile(char.IsLetter).ToArray());
if (!string.IsNullOrEmpty(col))
return ColumnNameToIndex(col);
}
// Single-cell dimension like "A1" means 1 column
if (parts.Length == 1)
{
var col = new string(parts[0].TakeWhile(char.IsLetter).ToArray());
if (!string.IsNullOrEmpty(col))
return ColumnNameToIndex(col);
}
}
// Fallback: scan all rows for max cell count
if (sheetData == null) return 0;
int maxCols = 0;
foreach (var row in sheetData.Elements<Row>())
{
var count = row.Elements<Cell>().Count();
if (count > maxCols) maxCols = count;
}
return maxCols;
}
public List<DocumentIssue> ViewAsIssues(string? issueType = null, int? limit = null)
{
var issues = new List<DocumentIssue>();
int issueNum = 0;
var sheets = GetWorksheets();
foreach (var (sheetName, worksheetPart) in sheets)
{
var sheetData = GetSheet(worksheetPart).GetFirstChild<SheetData>();
if (sheetData == null) continue;
foreach (var row in sheetData.Elements<Row>())
{
foreach (var cell in row.Elements<Cell>())
{
var cellRef = cell.CellReference?.Value ?? "?";
var value = GetCellDisplayValue(cell);
if (cell.CellFormula != null && value is "#REF!" or "#VALUE!" or "#NAME?" or "#DIV/0!")
{
issues.Add(new DocumentIssue
{
Id = $"F{++issueNum}",
Type = IssueType.Content,
Severity = IssueSeverity.Error,
Path = $"{sheetName}!{cellRef}",
Message = $"Formula error: {value}",
Context = $"={cell.CellFormula.Text}"
});
}
if (limit.HasValue && issues.Count >= limit.Value) break;
}
if (limit.HasValue && issues.Count >= limit.Value) break;
}
}
// CONSISTENCY(text-overflow-check): merged in from former `check` command.
// Emits wrapText-cells whose visible row-height budget can't fit the wrapped text.
foreach (var (path, msg) in CheckAllCellOverflow())
{
if (limit.HasValue && issues.Count >= limit.Value) break;
issues.Add(new DocumentIssue
{
Id = $"O{++issueNum}",
Type = IssueType.Format,
Severity = IssueSeverity.Warning,
Path = path,
Message = msg
});
}
return issues;
}
}