2026-03-16 07:26:18 +00:00
|
|
|
|
// Copyright 2025 OfficeCli (officecli.ai)
|
|
|
|
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
|
|
|
|
|
|
|
using System.Text;
|
2026-03-22 19:47:38 +00:00
|
|
|
|
using System.Text.Json.Nodes;
|
2026-03-16 07:26:18 +00:00
|
|
|
|
using DocumentFormat.OpenXml.Packaging;
|
|
|
|
|
|
using DocumentFormat.OpenXml.Spreadsheet;
|
|
|
|
|
|
using OfficeCli.Core;
|
|
|
|
|
|
|
|
|
|
|
|
namespace OfficeCli.Handlers;
|
|
|
|
|
|
|
|
|
|
|
|
public partial class ExcelHandler
|
|
|
|
|
|
{
|
|
|
|
|
|
public string ViewAsText(int? startLine = null, int? endLine = null, int? maxLines = null, HashSet<string>? cols = null)
|
|
|
|
|
|
{
|
|
|
|
|
|
var sb = new StringBuilder();
|
|
|
|
|
|
var sheets = GetWorksheets();
|
|
|
|
|
|
int sheetIdx = 0;
|
|
|
|
|
|
int emitted = 0;
|
|
|
|
|
|
bool truncated = false;
|
|
|
|
|
|
|
|
|
|
|
|
foreach (var (sheetName, worksheetPart) in sheets)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (truncated) break;
|
|
|
|
|
|
sb.AppendLine($"=== Sheet: {sheetName} ===");
|
|
|
|
|
|
var sheetData = GetSheet(worksheetPart).GetFirstChild<SheetData>();
|
|
|
|
|
|
if (sheetData == null) continue;
|
|
|
|
|
|
|
|
|
|
|
|
int totalRows = sheetData.Elements<Row>().Count();
|
2026-04-04 13:19:02 +00:00
|
|
|
|
var evaluator = new Core.FormulaEvaluator(sheetData, _doc.WorkbookPart);
|
2026-03-16 07:26:18 +00:00
|
|
|
|
int lineNum = 0;
|
|
|
|
|
|
foreach (var row in sheetData.Elements<Row>())
|
|
|
|
|
|
{
|
|
|
|
|
|
lineNum++;
|
|
|
|
|
|
if (startLine.HasValue && lineNum < startLine.Value) continue;
|
|
|
|
|
|
if (endLine.HasValue && lineNum > endLine.Value) break;
|
|
|
|
|
|
|
|
|
|
|
|
if (maxLines.HasValue && emitted >= maxLines.Value)
|
|
|
|
|
|
{
|
|
|
|
|
|
sb.AppendLine($"... (showed {emitted} rows, {totalRows} total in sheet, use --start/--end to view more)");
|
|
|
|
|
|
truncated = true;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
var cellElements = row.Elements<Cell>();
|
|
|
|
|
|
if (cols != null)
|
|
|
|
|
|
cellElements = cellElements.Where(c => cols.Contains(ParseCellReference(c.CellReference?.Value ?? "A1").Column));
|
2026-04-04 13:19:02 +00:00
|
|
|
|
var cells = cellElements.Select(c => GetCellDisplayValue(c, evaluator)).ToArray();
|
2026-03-18 16:02:24 +00:00
|
|
|
|
var rowRef = row.RowIndex?.Value ?? (uint)lineNum;
|
|
|
|
|
|
sb.AppendLine($"[/{sheetName}/row[{rowRef}]] {string.Join("\t", cells)}");
|
2026-03-16 07:26:18 +00:00
|
|
|
|
emitted++;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
sheetIdx++;
|
|
|
|
|
|
if (sheetIdx < sheets.Count) sb.AppendLine();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return sb.ToString().TrimEnd();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public string ViewAsAnnotated(int? startLine = null, int? endLine = null, int? maxLines = null, HashSet<string>? cols = null)
|
|
|
|
|
|
{
|
|
|
|
|
|
var sb = new StringBuilder();
|
|
|
|
|
|
var sheets = GetWorksheets();
|
|
|
|
|
|
int emitted = 0;
|
|
|
|
|
|
bool truncated = false;
|
|
|
|
|
|
|
|
|
|
|
|
foreach (var (sheetName, worksheetPart) in sheets)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (truncated) break;
|
|
|
|
|
|
sb.AppendLine($"=== Sheet: {sheetName} ===");
|
|
|
|
|
|
var sheetData = GetSheet(worksheetPart).GetFirstChild<SheetData>();
|
|
|
|
|
|
if (sheetData == null) continue;
|
|
|
|
|
|
|
|
|
|
|
|
int totalRows = sheetData.Elements<Row>().Count();
|
|
|
|
|
|
int lineNum = 0;
|
|
|
|
|
|
foreach (var row in sheetData.Elements<Row>())
|
|
|
|
|
|
{
|
|
|
|
|
|
lineNum++;
|
|
|
|
|
|
if (startLine.HasValue && lineNum < startLine.Value) continue;
|
|
|
|
|
|
if (endLine.HasValue && lineNum > endLine.Value) break;
|
|
|
|
|
|
|
|
|
|
|
|
if (maxLines.HasValue && emitted >= maxLines.Value)
|
|
|
|
|
|
{
|
|
|
|
|
|
sb.AppendLine($"... (showed {emitted} rows, {totalRows} total in sheet, use --start/--end to view more)");
|
|
|
|
|
|
truncated = true;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
var cellElements = row.Elements<Cell>();
|
|
|
|
|
|
if (cols != null)
|
|
|
|
|
|
cellElements = cellElements.Where(c => cols.Contains(ParseCellReference(c.CellReference?.Value ?? "A1").Column));
|
|
|
|
|
|
|
|
|
|
|
|
foreach (var cell in cellElements)
|
|
|
|
|
|
{
|
|
|
|
|
|
var cellRef = cell.CellReference?.Value ?? "?";
|
|
|
|
|
|
var value = GetCellDisplayValue(cell);
|
|
|
|
|
|
var formula = cell.CellFormula?.Text;
|
2026-03-19 11:36:08 +00:00
|
|
|
|
var type = cell.DataType?.InnerText ?? "Number";
|
2026-03-16 07:26:18 +00:00
|
|
|
|
|
|
|
|
|
|
var annotation = formula != null ? $"={formula}" : type;
|
|
|
|
|
|
var warn = "";
|
|
|
|
|
|
|
|
|
|
|
|
if (string.IsNullOrEmpty(value) && formula == null)
|
|
|
|
|
|
warn = " \u26a0 empty";
|
|
|
|
|
|
else if (formula != null && (value == "#REF!" || value == "#VALUE!" || value == "#NAME?"))
|
|
|
|
|
|
warn = " \u26a0 formula error";
|
|
|
|
|
|
|
|
|
|
|
|
sb.AppendLine($" {cellRef}: [{value}] \u2190 {annotation}{warn}");
|
|
|
|
|
|
}
|
|
|
|
|
|
emitted++;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return sb.ToString().TrimEnd();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public string ViewAsOutline()
|
|
|
|
|
|
{
|
|
|
|
|
|
var sb = new StringBuilder();
|
|
|
|
|
|
var workbook = _doc.WorkbookPart?.Workbook;
|
|
|
|
|
|
if (workbook == null) return "(empty workbook)";
|
|
|
|
|
|
|
|
|
|
|
|
var sheets = workbook.GetFirstChild<Sheets>();
|
|
|
|
|
|
if (sheets == null) return "(no sheets)";
|
|
|
|
|
|
|
|
|
|
|
|
sb.AppendLine($"File: {Path.GetFileName(_filePath)}");
|
|
|
|
|
|
|
|
|
|
|
|
foreach (var sheet in sheets.Elements<Sheet>())
|
|
|
|
|
|
{
|
|
|
|
|
|
var name = sheet.Name?.Value ?? "?";
|
|
|
|
|
|
var sheetId = sheet.Id?.Value;
|
|
|
|
|
|
if (sheetId == null) continue;
|
|
|
|
|
|
|
|
|
|
|
|
var worksheetPart = (WorksheetPart)_doc.WorkbookPart!.GetPartById(sheetId);
|
2026-03-29 07:27:06 +00:00
|
|
|
|
var worksheet = GetSheet(worksheetPart);
|
|
|
|
|
|
var sheetData = worksheet.GetFirstChild<SheetData>();
|
2026-03-16 07:26:18 +00:00
|
|
|
|
|
|
|
|
|
|
int rowCount = sheetData?.Elements<Row>().Count() ?? 0;
|
2026-03-29 07:27:06 +00:00
|
|
|
|
int colCount = GetSheetColumnCount(worksheet, sheetData);
|
2026-03-16 07:26:18 +00:00
|
|
|
|
|
|
|
|
|
|
int formulaCount = 0;
|
|
|
|
|
|
if (sheetData != null)
|
|
|
|
|
|
{
|
|
|
|
|
|
formulaCount = sheetData.Descendants<CellFormula>().Count();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
var formulaInfo = formulaCount > 0 ? $", {formulaCount} formula(s)" : "";
|
2026-04-08 11:03:30 +00:00
|
|
|
|
|
|
|
|
|
|
// Pivot tables are stored as pivotTableDefinition XML; their rendered cells
|
|
|
|
|
|
// are NOT materialized into sheetData (Excel/Calc re-render from pivotCacheRecords
|
|
|
|
|
|
// at display time). Without this hint, a pivot-only sheet looks like "0 rows × 0 cols"
|
|
|
|
|
|
// and users think it's empty. Surface the pivot count explicitly — same strategy POI
|
|
|
|
|
|
// takes via XSSFSheet.getPivotTables(). See also: query pivottable.
|
|
|
|
|
|
int pivotCount = worksheetPart.PivotTableParts.Count();
|
|
|
|
|
|
var pivotInfo = pivotCount > 0 ? $", {pivotCount} pivot table(s)" : "";
|
|
|
|
|
|
|
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx
Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format
consistency. Shared OleHelper provides ProgID detection, part cleanup on
failure, self-embed handling, and input validation.
Highlights:
- Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE,
v:shape@alt for name, body scoped query with attr filter, SDT support
- Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision
for unit-qualified width/height round-trip, VML placeholder shape
- PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and
path styles), full set vocabulary incl. name/display/width/height
- Canonical Format schema: progId, contentType, fileSize, width, height,
display, name, relId (cross-format consistent)
- get --save <path> extracts embedded binary payload with savedTo/savedBytes
- raw <sheet>/<relId> fallback for inspecting OLE embedded parts
- view outline/stats/text report OLE counts and placeholders uniformly
- Query attribute filter supports [progId=], [fileSize>], [progId~=], etc.
- Resident mode: supports add/set/remove/swap/get --save for OLE
- Help: docx/xlsx/pptx add help documents OLE type + properties
- Aliases: ole, oleobject, object, embed (add + query, case-insensitive)
Error handling:
- ProgID validation (≤39 chars, no leading digit, valid COM chars)
- Empty source file emits stderr warning
- Locked source file surfaces friendly "close resident first" hint
- Negative width/height rejected on set
- Excel display/name explicitly rejected (schema unsupported)
- Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
|
|
|
|
int oleCount = CountSheetOleObjects(worksheetPart);
|
|
|
|
|
|
var oleInfo = oleCount > 0 ? $", {oleCount} ole object(s)" : "";
|
|
|
|
|
|
|
|
|
|
|
|
sb.AppendLine($"\u251c\u2500\u2500 \"{name}\" ({rowCount} rows \u00d7 {colCount} cols{formulaInfo}{pivotInfo}{oleInfo})");
|
2026-03-16 07:26:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return sb.ToString().TrimEnd();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx
Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format
consistency. Shared OleHelper provides ProgID detection, part cleanup on
failure, self-embed handling, and input validation.
Highlights:
- Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE,
v:shape@alt for name, body scoped query with attr filter, SDT support
- Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision
for unit-qualified width/height round-trip, VML placeholder shape
- PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and
path styles), full set vocabulary incl. name/display/width/height
- Canonical Format schema: progId, contentType, fileSize, width, height,
display, name, relId (cross-format consistent)
- get --save <path> extracts embedded binary payload with savedTo/savedBytes
- raw <sheet>/<relId> fallback for inspecting OLE embedded parts
- view outline/stats/text report OLE counts and placeholders uniformly
- Query attribute filter supports [progId=], [fileSize>], [progId~=], etc.
- Resident mode: supports add/set/remove/swap/get --save for OLE
- Help: docx/xlsx/pptx add help documents OLE type + properties
- Aliases: ole, oleobject, object, embed (add + query, case-insensitive)
Error handling:
- ProgID validation (≤39 chars, no leading digit, valid COM chars)
- Empty source file emits stderr warning
- Locked source file surfaces friendly "close resident first" hint
- Negative width/height rejected on set
- Excel display/name explicitly rejected (schema unsupported)
- Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
|
|
|
|
// CONSISTENCY(ole-stats): per-sheet OLE counter shared by outline and
|
|
|
|
|
|
// outlineJson. Same dedup rule as ViewAsStats — referenced oleObject
|
|
|
|
|
|
// elements count once, orphan embedded/package parts add extras.
|
|
|
|
|
|
private int CountSheetOleObjects(WorksheetPart worksheetPart)
|
|
|
|
|
|
{
|
|
|
|
|
|
int count = 0;
|
|
|
|
|
|
var referenced = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
|
foreach (var oleEl in GetSheet(worksheetPart).Descendants<OleObject>())
|
|
|
|
|
|
{
|
|
|
|
|
|
count++;
|
|
|
|
|
|
if (oleEl.Id?.Value is string rid && !string.IsNullOrEmpty(rid))
|
|
|
|
|
|
referenced.Add(rid);
|
|
|
|
|
|
}
|
|
|
|
|
|
count += worksheetPart.EmbeddedObjectParts.Count(p => !referenced.Contains(worksheetPart.GetIdOfPart(p)));
|
|
|
|
|
|
count += worksheetPart.EmbeddedPackageParts.Count(p => !referenced.Contains(worksheetPart.GetIdOfPart(p)));
|
|
|
|
|
|
return count;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-16 07:26:18 +00:00
|
|
|
|
public string ViewAsStats()
|
|
|
|
|
|
{
|
|
|
|
|
|
var sb = new StringBuilder();
|
|
|
|
|
|
var sheets = GetWorksheets();
|
|
|
|
|
|
int totalCells = 0;
|
|
|
|
|
|
int emptyCells = 0;
|
|
|
|
|
|
int formulaCells = 0;
|
|
|
|
|
|
int errorCells = 0;
|
|
|
|
|
|
var typeCounts = new Dictionary<string, int>();
|
|
|
|
|
|
|
|
|
|
|
|
foreach (var (sheetName, worksheetPart) in sheets)
|
|
|
|
|
|
{
|
|
|
|
|
|
var sheetData = GetSheet(worksheetPart).GetFirstChild<SheetData>();
|
|
|
|
|
|
if (sheetData == null) continue;
|
|
|
|
|
|
|
|
|
|
|
|
foreach (var row in sheetData.Elements<Row>())
|
|
|
|
|
|
{
|
|
|
|
|
|
foreach (var cell in row.Elements<Cell>())
|
|
|
|
|
|
{
|
|
|
|
|
|
totalCells++;
|
|
|
|
|
|
var value = GetCellDisplayValue(cell);
|
|
|
|
|
|
if (string.IsNullOrEmpty(value)) emptyCells++;
|
|
|
|
|
|
if (cell.CellFormula != null) formulaCells++;
|
|
|
|
|
|
if (value is "#REF!" or "#VALUE!" or "#NAME?" or "#DIV/0!") errorCells++;
|
|
|
|
|
|
|
2026-03-19 11:36:08 +00:00
|
|
|
|
var type = cell.DataType?.InnerText ?? "Number";
|
2026-03-16 07:26:18 +00:00
|
|
|
|
typeCounts[type] = typeCounts.GetValueOrDefault(type) + 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx
Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format
consistency. Shared OleHelper provides ProgID detection, part cleanup on
failure, self-embed handling, and input validation.
Highlights:
- Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE,
v:shape@alt for name, body scoped query with attr filter, SDT support
- Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision
for unit-qualified width/height round-trip, VML placeholder shape
- PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and
path styles), full set vocabulary incl. name/display/width/height
- Canonical Format schema: progId, contentType, fileSize, width, height,
display, name, relId (cross-format consistent)
- get --save <path> extracts embedded binary payload with savedTo/savedBytes
- raw <sheet>/<relId> fallback for inspecting OLE embedded parts
- view outline/stats/text report OLE counts and placeholders uniformly
- Query attribute filter supports [progId=], [fileSize>], [progId~=], etc.
- Resident mode: supports add/set/remove/swap/get --save for OLE
- Help: docx/xlsx/pptx add help documents OLE type + properties
- Aliases: ole, oleobject, object, embed (add + query, case-insensitive)
Error handling:
- ProgID validation (≤39 chars, no leading digit, valid COM chars)
- Empty source file emits stderr warning
- Locked source file surfaces friendly "close resident first" hint
- Negative width/height rejected on set
- Excel display/name explicitly rejected (schema unsupported)
- Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
|
|
|
|
// OLE object count across all sheets. Same dedup rule as
|
|
|
|
|
|
// CollectOleNodesForSheet: referenced parts count as one entry
|
|
|
|
|
|
// (via their oleObject element), orphan parts add extras.
|
|
|
|
|
|
int oleCount = 0;
|
|
|
|
|
|
foreach (var (_, worksheetPart) in sheets)
|
|
|
|
|
|
{
|
|
|
|
|
|
var referenced = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
|
foreach (var oleEl in GetSheet(worksheetPart).Descendants<OleObject>())
|
|
|
|
|
|
{
|
|
|
|
|
|
oleCount++;
|
|
|
|
|
|
if (oleEl.Id?.Value is string rid && !string.IsNullOrEmpty(rid))
|
|
|
|
|
|
referenced.Add(rid);
|
|
|
|
|
|
}
|
|
|
|
|
|
oleCount += worksheetPart.EmbeddedObjectParts.Count(p => !referenced.Contains(worksheetPart.GetIdOfPart(p)));
|
|
|
|
|
|
oleCount += worksheetPart.EmbeddedPackageParts.Count(p => !referenced.Contains(worksheetPart.GetIdOfPart(p)));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-16 07:26:18 +00:00
|
|
|
|
sb.AppendLine($"Sheets: {sheets.Count}");
|
|
|
|
|
|
sb.AppendLine($"Total Cells: {totalCells}");
|
|
|
|
|
|
sb.AppendLine($"Empty Cells: {emptyCells}");
|
|
|
|
|
|
sb.AppendLine($"Formula Cells: {formulaCells}");
|
|
|
|
|
|
sb.AppendLine($"Error Cells: {errorCells}");
|
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx
Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format
consistency. Shared OleHelper provides ProgID detection, part cleanup on
failure, self-embed handling, and input validation.
Highlights:
- Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE,
v:shape@alt for name, body scoped query with attr filter, SDT support
- Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision
for unit-qualified width/height round-trip, VML placeholder shape
- PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and
path styles), full set vocabulary incl. name/display/width/height
- Canonical Format schema: progId, contentType, fileSize, width, height,
display, name, relId (cross-format consistent)
- get --save <path> extracts embedded binary payload with savedTo/savedBytes
- raw <sheet>/<relId> fallback for inspecting OLE embedded parts
- view outline/stats/text report OLE counts and placeholders uniformly
- Query attribute filter supports [progId=], [fileSize>], [progId~=], etc.
- Resident mode: supports add/set/remove/swap/get --save for OLE
- Help: docx/xlsx/pptx add help documents OLE type + properties
- Aliases: ole, oleobject, object, embed (add + query, case-insensitive)
Error handling:
- ProgID validation (≤39 chars, no leading digit, valid COM chars)
- Empty source file emits stderr warning
- Locked source file surfaces friendly "close resident first" hint
- Negative width/height rejected on set
- Excel display/name explicitly rejected (schema unsupported)
- Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
|
|
|
|
if (oleCount > 0) sb.AppendLine($"OLE Objects: {oleCount}");
|
2026-03-16 07:26:18 +00:00
|
|
|
|
sb.AppendLine();
|
|
|
|
|
|
sb.AppendLine("Data Type Distribution:");
|
|
|
|
|
|
foreach (var (type, count) in typeCounts.OrderByDescending(kv => kv.Value))
|
|
|
|
|
|
sb.AppendLine($" {type}: {count}");
|
|
|
|
|
|
|
|
|
|
|
|
return sb.ToString().TrimEnd();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-22 19:47:38 +00:00
|
|
|
|
public JsonNode ViewAsStatsJson()
|
|
|
|
|
|
{
|
|
|
|
|
|
var sheets = GetWorksheets();
|
|
|
|
|
|
int totalCells = 0, emptyCells = 0, formulaCells = 0, errorCells = 0;
|
|
|
|
|
|
var typeCounts = new Dictionary<string, int>();
|
|
|
|
|
|
|
|
|
|
|
|
foreach (var (sheetName, worksheetPart) in sheets)
|
|
|
|
|
|
{
|
|
|
|
|
|
var sheetData = GetSheet(worksheetPart).GetFirstChild<SheetData>();
|
|
|
|
|
|
if (sheetData == null) continue;
|
|
|
|
|
|
|
|
|
|
|
|
foreach (var row in sheetData.Elements<Row>())
|
|
|
|
|
|
foreach (var cell in row.Elements<Cell>())
|
|
|
|
|
|
{
|
|
|
|
|
|
totalCells++;
|
|
|
|
|
|
var value = GetCellDisplayValue(cell);
|
|
|
|
|
|
if (string.IsNullOrEmpty(value)) emptyCells++;
|
|
|
|
|
|
if (cell.CellFormula != null) formulaCells++;
|
|
|
|
|
|
if (value is "#REF!" or "#VALUE!" or "#NAME?" or "#DIV/0!") errorCells++;
|
|
|
|
|
|
var type = cell.DataType?.InnerText ?? "Number";
|
|
|
|
|
|
typeCounts[type] = typeCounts.GetValueOrDefault(type) + 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx
Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format
consistency. Shared OleHelper provides ProgID detection, part cleanup on
failure, self-embed handling, and input validation.
Highlights:
- Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE,
v:shape@alt for name, body scoped query with attr filter, SDT support
- Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision
for unit-qualified width/height round-trip, VML placeholder shape
- PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and
path styles), full set vocabulary incl. name/display/width/height
- Canonical Format schema: progId, contentType, fileSize, width, height,
display, name, relId (cross-format consistent)
- get --save <path> extracts embedded binary payload with savedTo/savedBytes
- raw <sheet>/<relId> fallback for inspecting OLE embedded parts
- view outline/stats/text report OLE counts and placeholders uniformly
- Query attribute filter supports [progId=], [fileSize>], [progId~=], etc.
- Resident mode: supports add/set/remove/swap/get --save for OLE
- Help: docx/xlsx/pptx add help documents OLE type + properties
- Aliases: ole, oleobject, object, embed (add + query, case-insensitive)
Error handling:
- ProgID validation (≤39 chars, no leading digit, valid COM chars)
- Empty source file emits stderr warning
- Locked source file surfaces friendly "close resident first" hint
- Negative width/height rejected on set
- Excel display/name explicitly rejected (schema unsupported)
- Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
|
|
|
|
int oleCountJson = 0;
|
|
|
|
|
|
foreach (var (_, worksheetPart) in sheets)
|
|
|
|
|
|
{
|
|
|
|
|
|
var refSet = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
|
|
foreach (var oleEl in GetSheet(worksheetPart).Descendants<OleObject>())
|
|
|
|
|
|
{
|
|
|
|
|
|
oleCountJson++;
|
|
|
|
|
|
if (oleEl.Id?.Value is string rid && !string.IsNullOrEmpty(rid))
|
|
|
|
|
|
refSet.Add(rid);
|
|
|
|
|
|
}
|
|
|
|
|
|
oleCountJson += worksheetPart.EmbeddedObjectParts.Count(p => !refSet.Contains(worksheetPart.GetIdOfPart(p)));
|
|
|
|
|
|
oleCountJson += worksheetPart.EmbeddedPackageParts.Count(p => !refSet.Contains(worksheetPart.GetIdOfPart(p)));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-22 19:47:38 +00:00
|
|
|
|
var result = new JsonObject
|
|
|
|
|
|
{
|
|
|
|
|
|
["sheets"] = sheets.Count,
|
|
|
|
|
|
["totalCells"] = totalCells,
|
|
|
|
|
|
["emptyCells"] = emptyCells,
|
|
|
|
|
|
["formulaCells"] = formulaCells,
|
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx
Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format
consistency. Shared OleHelper provides ProgID detection, part cleanup on
failure, self-embed handling, and input validation.
Highlights:
- Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE,
v:shape@alt for name, body scoped query with attr filter, SDT support
- Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision
for unit-qualified width/height round-trip, VML placeholder shape
- PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and
path styles), full set vocabulary incl. name/display/width/height
- Canonical Format schema: progId, contentType, fileSize, width, height,
display, name, relId (cross-format consistent)
- get --save <path> extracts embedded binary payload with savedTo/savedBytes
- raw <sheet>/<relId> fallback for inspecting OLE embedded parts
- view outline/stats/text report OLE counts and placeholders uniformly
- Query attribute filter supports [progId=], [fileSize>], [progId~=], etc.
- Resident mode: supports add/set/remove/swap/get --save for OLE
- Help: docx/xlsx/pptx add help documents OLE type + properties
- Aliases: ole, oleobject, object, embed (add + query, case-insensitive)
Error handling:
- ProgID validation (≤39 chars, no leading digit, valid COM chars)
- Empty source file emits stderr warning
- Locked source file surfaces friendly "close resident first" hint
- Negative width/height rejected on set
- Excel display/name explicitly rejected (schema unsupported)
- Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
|
|
|
|
["errorCells"] = errorCells,
|
|
|
|
|
|
["oleObjects"] = oleCountJson,
|
2026-03-22 19:47:38 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
var types = new JsonObject();
|
|
|
|
|
|
foreach (var (type, count) in typeCounts.OrderByDescending(kv => kv.Value))
|
|
|
|
|
|
types[type] = count;
|
|
|
|
|
|
result["dataTypeDistribution"] = types;
|
|
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public JsonNode ViewAsOutlineJson()
|
|
|
|
|
|
{
|
|
|
|
|
|
var workbook = _doc.WorkbookPart?.Workbook;
|
|
|
|
|
|
if (workbook == null) return new JsonObject();
|
|
|
|
|
|
|
|
|
|
|
|
var sheetsEl = workbook.GetFirstChild<Sheets>();
|
|
|
|
|
|
if (sheetsEl == null) return new JsonObject { ["fileName"] = Path.GetFileName(_filePath), ["sheets"] = new JsonArray() };
|
|
|
|
|
|
|
|
|
|
|
|
var sheetsArray = new JsonArray();
|
|
|
|
|
|
foreach (var sheet in sheetsEl.Elements<Sheet>())
|
|
|
|
|
|
{
|
|
|
|
|
|
var name = sheet.Name?.Value ?? "?";
|
|
|
|
|
|
var sheetId = sheet.Id?.Value;
|
|
|
|
|
|
if (sheetId == null) continue;
|
|
|
|
|
|
|
|
|
|
|
|
var worksheetPart = (WorksheetPart)_doc.WorkbookPart!.GetPartById(sheetId);
|
2026-03-29 07:27:06 +00:00
|
|
|
|
var worksheet = GetSheet(worksheetPart);
|
|
|
|
|
|
var sheetData = worksheet.GetFirstChild<SheetData>();
|
2026-03-22 19:47:38 +00:00
|
|
|
|
int rowCount = sheetData?.Elements<Row>().Count() ?? 0;
|
2026-03-29 07:27:06 +00:00
|
|
|
|
int colCount = GetSheetColumnCount(worksheet, sheetData);
|
2026-03-22 19:47:38 +00:00
|
|
|
|
int formulaCount = sheetData?.Descendants<CellFormula>().Count() ?? 0;
|
|
|
|
|
|
|
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx
Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format
consistency. Shared OleHelper provides ProgID detection, part cleanup on
failure, self-embed handling, and input validation.
Highlights:
- Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE,
v:shape@alt for name, body scoped query with attr filter, SDT support
- Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision
for unit-qualified width/height round-trip, VML placeholder shape
- PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and
path styles), full set vocabulary incl. name/display/width/height
- Canonical Format schema: progId, contentType, fileSize, width, height,
display, name, relId (cross-format consistent)
- get --save <path> extracts embedded binary payload with savedTo/savedBytes
- raw <sheet>/<relId> fallback for inspecting OLE embedded parts
- view outline/stats/text report OLE counts and placeholders uniformly
- Query attribute filter supports [progId=], [fileSize>], [progId~=], etc.
- Resident mode: supports add/set/remove/swap/get --save for OLE
- Help: docx/xlsx/pptx add help documents OLE type + properties
- Aliases: ole, oleobject, object, embed (add + query, case-insensitive)
Error handling:
- ProgID validation (≤39 chars, no leading digit, valid COM chars)
- Empty source file emits stderr warning
- Locked source file surfaces friendly "close resident first" hint
- Negative width/height rejected on set
- Excel display/name explicitly rejected (schema unsupported)
- Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
|
|
|
|
int oleCount = CountSheetOleObjects(worksheetPart);
|
2026-03-22 19:47:38 +00:00
|
|
|
|
var sheetObj = new JsonObject
|
|
|
|
|
|
{
|
|
|
|
|
|
["name"] = name,
|
|
|
|
|
|
["rows"] = rowCount,
|
|
|
|
|
|
["cols"] = colCount,
|
feat(ole): comprehensive OLE (Object Linking and Embedding) support across docx/xlsx/pptx
Implement full OLE lifecycle (Add/Query/Get/Set/Remove) with cross-format
consistency. Shared OleHelper provides ProgID detection, part cleanup on
failure, self-embed handling, and input validation.
Highlights:
- Word: VML shape + o:OLEObject with deduped shapetype, header/footer OLE,
v:shape@alt for name, body scoped query with attr filter, SDT support
- Excel: modern oleObject + objectPr/ObjectAnchor with sub-cell EMU precision
for unit-qualified width/height round-trip, VML placeholder shape
- PowerPoint: GraphicFrame with p:oleObj, slide-scoped query (both CSS and
path styles), full set vocabulary incl. name/display/width/height
- Canonical Format schema: progId, contentType, fileSize, width, height,
display, name, relId (cross-format consistent)
- get --save <path> extracts embedded binary payload with savedTo/savedBytes
- raw <sheet>/<relId> fallback for inspecting OLE embedded parts
- view outline/stats/text report OLE counts and placeholders uniformly
- Query attribute filter supports [progId=], [fileSize>], [progId~=], etc.
- Resident mode: supports add/set/remove/swap/get --save for OLE
- Help: docx/xlsx/pptx add help documents OLE type + properties
- Aliases: ole, oleobject, object, embed (add + query, case-insensitive)
Error handling:
- ProgID validation (≤39 chars, no leading digit, valid COM chars)
- Empty source file emits stderr warning
- Locked source file surfaces friendly "close resident first" hint
- Negative width/height rejected on set
- Excel display/name explicitly rejected (schema unsupported)
- Invalid display value rejected with clear error
2026-04-11 10:51:21 +00:00
|
|
|
|
["formulas"] = formulaCount,
|
|
|
|
|
|
["oleObjects"] = oleCount
|
2026-03-22 19:47:38 +00:00
|
|
|
|
};
|
|
|
|
|
|
sheetsArray.Add((JsonNode)sheetObj);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return new JsonObject
|
|
|
|
|
|
{
|
|
|
|
|
|
["fileName"] = Path.GetFileName(_filePath),
|
|
|
|
|
|
["sheets"] = sheetsArray
|
|
|
|
|
|
};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public JsonNode ViewAsTextJson(int? startLine = null, int? endLine = null, int? maxLines = null, HashSet<string>? cols = null)
|
|
|
|
|
|
{
|
|
|
|
|
|
var sheetsArray = new JsonArray();
|
|
|
|
|
|
var worksheets = GetWorksheets();
|
|
|
|
|
|
int emitted = 0;
|
|
|
|
|
|
bool truncated = false;
|
|
|
|
|
|
|
|
|
|
|
|
foreach (var (sheetName, worksheetPart) in worksheets)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (truncated) break;
|
|
|
|
|
|
var sheetData = GetSheet(worksheetPart).GetFirstChild<SheetData>();
|
|
|
|
|
|
if (sheetData == null) continue;
|
|
|
|
|
|
|
|
|
|
|
|
var rowsArray = new JsonArray();
|
|
|
|
|
|
int lineNum = 0;
|
|
|
|
|
|
foreach (var row in sheetData.Elements<Row>())
|
|
|
|
|
|
{
|
|
|
|
|
|
lineNum++;
|
|
|
|
|
|
if (startLine.HasValue && lineNum < startLine.Value) continue;
|
|
|
|
|
|
if (endLine.HasValue && lineNum > endLine.Value) break;
|
|
|
|
|
|
if (maxLines.HasValue && emitted >= maxLines.Value) { truncated = true; break; }
|
|
|
|
|
|
|
|
|
|
|
|
var cellElements = row.Elements<Cell>();
|
|
|
|
|
|
if (cols != null)
|
|
|
|
|
|
cellElements = cellElements.Where(c => cols.Contains(ParseCellReference(c.CellReference?.Value ?? "A1").Column));
|
|
|
|
|
|
|
|
|
|
|
|
var cellsObj = new JsonObject();
|
|
|
|
|
|
foreach (var cell in cellElements)
|
|
|
|
|
|
{
|
|
|
|
|
|
var cellRef = cell.CellReference?.Value ?? "?";
|
|
|
|
|
|
cellsObj[cellRef] = GetCellDisplayValue(cell);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
var rowRef = row.RowIndex?.Value ?? (uint)lineNum;
|
|
|
|
|
|
rowsArray.Add((JsonNode)new JsonObject
|
|
|
|
|
|
{
|
|
|
|
|
|
["row"] = (int)rowRef,
|
|
|
|
|
|
["cells"] = cellsObj
|
|
|
|
|
|
});
|
|
|
|
|
|
emitted++;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
sheetsArray.Add((JsonNode)new JsonObject
|
|
|
|
|
|
{
|
|
|
|
|
|
["name"] = sheetName,
|
|
|
|
|
|
["rows"] = rowsArray
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return new JsonObject { ["sheets"] = sheetsArray };
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-29 07:27:06 +00:00
|
|
|
|
private static int GetSheetColumnCount(Worksheet worksheet, SheetData? sheetData)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Try SheetDimension first (e.g., <dimension ref="A1:F20"/>)
|
|
|
|
|
|
var dimRef = worksheet.GetFirstChild<SheetDimension>()?.Reference?.Value;
|
|
|
|
|
|
if (!string.IsNullOrEmpty(dimRef))
|
|
|
|
|
|
{
|
|
|
|
|
|
var parts = dimRef.Split(':');
|
|
|
|
|
|
if (parts.Length == 2)
|
|
|
|
|
|
{
|
|
|
|
|
|
var endRef = parts[1];
|
|
|
|
|
|
var col = new string(endRef.TakeWhile(char.IsLetter).ToArray());
|
|
|
|
|
|
if (!string.IsNullOrEmpty(col))
|
|
|
|
|
|
return ColumnNameToIndex(col);
|
|
|
|
|
|
}
|
|
|
|
|
|
// Single-cell dimension like "A1" means 1 column
|
|
|
|
|
|
if (parts.Length == 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
var col = new string(parts[0].TakeWhile(char.IsLetter).ToArray());
|
|
|
|
|
|
if (!string.IsNullOrEmpty(col))
|
|
|
|
|
|
return ColumnNameToIndex(col);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Fallback: scan all rows for max cell count
|
|
|
|
|
|
if (sheetData == null) return 0;
|
|
|
|
|
|
int maxCols = 0;
|
|
|
|
|
|
foreach (var row in sheetData.Elements<Row>())
|
|
|
|
|
|
{
|
|
|
|
|
|
var count = row.Elements<Cell>().Count();
|
|
|
|
|
|
if (count > maxCols) maxCols = count;
|
|
|
|
|
|
}
|
|
|
|
|
|
return maxCols;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-16 07:26:18 +00:00
|
|
|
|
public List<DocumentIssue> ViewAsIssues(string? issueType = null, int? limit = null)
|
|
|
|
|
|
{
|
|
|
|
|
|
var issues = new List<DocumentIssue>();
|
|
|
|
|
|
int issueNum = 0;
|
|
|
|
|
|
|
|
|
|
|
|
var sheets = GetWorksheets();
|
|
|
|
|
|
foreach (var (sheetName, worksheetPart) in sheets)
|
|
|
|
|
|
{
|
|
|
|
|
|
var sheetData = GetSheet(worksheetPart).GetFirstChild<SheetData>();
|
|
|
|
|
|
if (sheetData == null) continue;
|
|
|
|
|
|
|
|
|
|
|
|
foreach (var row in sheetData.Elements<Row>())
|
|
|
|
|
|
{
|
|
|
|
|
|
foreach (var cell in row.Elements<Cell>())
|
|
|
|
|
|
{
|
|
|
|
|
|
var cellRef = cell.CellReference?.Value ?? "?";
|
|
|
|
|
|
var value = GetCellDisplayValue(cell);
|
|
|
|
|
|
|
|
|
|
|
|
if (cell.CellFormula != null && value is "#REF!" or "#VALUE!" or "#NAME?" or "#DIV/0!")
|
|
|
|
|
|
{
|
|
|
|
|
|
issues.Add(new DocumentIssue
|
|
|
|
|
|
{
|
|
|
|
|
|
Id = $"F{++issueNum}",
|
|
|
|
|
|
Type = IssueType.Content,
|
|
|
|
|
|
Severity = IssueSeverity.Error,
|
|
|
|
|
|
Path = $"{sheetName}!{cellRef}",
|
|
|
|
|
|
Message = $"Formula error: {value}",
|
|
|
|
|
|
Context = $"={cell.CellFormula.Text}"
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (limit.HasValue && issues.Count >= limit.Value) break;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (limit.HasValue && issues.Count >= limit.Value) break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-19 06:44:02 +00:00
|
|
|
|
// CONSISTENCY(text-overflow-check): merged in from former `check` command.
|
|
|
|
|
|
// Emits wrapText-cells whose visible row-height budget can't fit the wrapped text.
|
|
|
|
|
|
foreach (var (path, msg) in CheckAllCellOverflow())
|
|
|
|
|
|
{
|
|
|
|
|
|
if (limit.HasValue && issues.Count >= limit.Value) break;
|
|
|
|
|
|
issues.Add(new DocumentIssue
|
|
|
|
|
|
{
|
|
|
|
|
|
Id = $"O{++issueNum}",
|
|
|
|
|
|
Type = IssueType.Format,
|
|
|
|
|
|
Severity = IssueSeverity.Warning,
|
|
|
|
|
|
Path = path,
|
|
|
|
|
|
Message = msg
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-16 07:26:18 +00:00
|
|
|
|
return issues;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|