Merge pull request #50 from konbakuyomu/feat/word-ole-image-improvements

feat(word): OLE object support, image auto aspect ratio, and insertion positioning fixes
This commit is contained in:
goworm 2026-04-10 13:30:44 +08:00 committed by GitHub
commit 3777b08834
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 1141 additions and 38 deletions

View file

@ -340,6 +340,8 @@ officecli validate doc.docx
| Page number on cover | Adding `--type footer --prop type=first` automatically enables differentFirstPage. Do NOT use `set / --prop differentFirstPage=true` — that prop is UNSUPPORTED and silently fails |
| TOC skipped for multi-heading docs | Any document with 3+ headings requires a TOC. It is not optional — add with `--type toc --index 0` after the cover page break |
| Code block indentation via spaces | Use the `ind.left` paragraph property (e.g. `--prop ind.left=720`) for code block indentation — consecutive spaces as padding produce `view issues` warnings and visually inconsistent results |
| `--type paragraph --prop "image=..."` | Wrong syntax — creates empty paragraph. Use `--type picture --prop "path=file.png" --prop "width=12cm" --prop "height=12.5cm"`. Both width AND height required (omitting height defaults to 4in) |
| Image shows as thin sliver | Image paragraph inherited fixed line spacing from Normal style. Set `--prop lineSpacing=1x` on the image paragraph, or use patched version which does this automatically |
---
# officecli: v1.0.23
@ -396,6 +398,8 @@ Batch fields: `command`, `path`, `parent`, `type`, `from`, `to`, `index`, `after
| **`\mathcal` in equations causes validation errors** | The `\mathcal` LaTeX command generates invalid `m:scr` XML. Use `\mathit` or plain letters instead. |
| **`view text` shows "1." for all numbered items** | Display-only limitation. Rendered output in Word/LibreOffice shows correct auto-incrementing numbers. |
| **`chartType=pie`/`doughnut` in LibreOffice PDF** | **Do NOT use `chartType=pie` or `chartType=doughnut` when LibreOffice PDF delivery is required.** These chart types render without visible slices in LibreOffice PDF export — only labels and legend appear, slices are invisible. Use `chartType=column` or `chartType=bar` instead. Charts render correctly in Microsoft Word only. |
| **`--after`/`--before` offset when document has tables** *(fixed in fork)* | When the document body contains `<w:tbl>` or `<w:sectPr>` elements, `--after`/`--before`/`--index` positioning shifts by 1 per non-paragraph element. Root cause: `ResolveAnchorPosition` computes index against `ChildElements` (all types), but `AddPicture`/`AddParagraph` look up against `Elements<Paragraph>()` (paragraphs only). **Fix applied**: both methods now use `ChildElements` for index lookup. If using unpatched version, verify insertion position with `view annotated` after each insert. |
| **Inserted images clipped to one line height** *(fixed in fork)* | `add --type picture` creates a bare `<w:p>` with no `<w:pPr>`. If the document's Normal style has fixed line spacing ("Exactly Npt"), the image is clipped to that height — e.g., a 12cm image shows as a 1cm sliver. **Fix applied**: `AddPicture` now auto-injects `<w:spacing w:line="240" w:lineRule="auto"/>`. If using unpatched version, run `set "/body/p[@paraId=XXX]" --prop lineSpacing=1x` on each image paragraph after insertion. |
---
# officecli: v1.0.23

View file

@ -12,6 +12,7 @@ internal static class HtmlPreviewHelper
{
/// <summary>
/// Load an OpenXML part by its relationship ID and return the content as a base64 data URI.
/// EMF/WMF images are automatically converted to PNG for browser compatibility.
/// Returns null if the part cannot be found or read.
/// </summary>
public static string? PartToDataUri(OpenXmlPart parentPart, string relId)
@ -23,6 +24,15 @@ internal static class HtmlPreviewHelper
using var ms = new MemoryStream();
stream.CopyTo(ms);
var contentType = part.ContentType ?? "image/png";
// Convert EMF/WMF to PNG for browser rendering
if (contentType.Contains("emf", StringComparison.OrdinalIgnoreCase) ||
contentType.Contains("wmf", StringComparison.OrdinalIgnoreCase))
{
var pngUri = ConvertMetafileToPngDataUri(ms.ToArray());
if (pngUri != null) return pngUri;
}
return $"data:{contentType};base64,{Convert.ToBase64String(ms.ToArray())}";
}
catch
@ -30,4 +40,44 @@ internal static class HtmlPreviewHelper
return null;
}
}
/// <summary>
/// Convert EMF/WMF metafile bytes to a PNG data URI using System.Drawing (Windows GDI+).
/// Returns null if conversion fails.
/// </summary>
private static string? ConvertMetafileToPngDataUri(byte[] metafileBytes)
{
try
{
using var emfStream = new MemoryStream(metafileBytes);
using var metafile = new System.Drawing.Imaging.Metafile(emfStream);
int width = metafile.Width;
int height = metafile.Height;
if (width <= 0 || height <= 0) return null;
// Scale up for readability: use 2x for small images
int scale = (width < 400 || height < 300) ? 2 : 1;
int bmpW = width * scale;
int bmpH = height * scale;
using var bitmap = new System.Drawing.Bitmap(bmpW, bmpH);
bitmap.SetResolution(96, 96);
using (var g = System.Drawing.Graphics.FromImage(bitmap))
{
g.Clear(System.Drawing.Color.White);
g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality;
g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;
g.DrawImage(metafile, 0, 0, bmpW, bmpH);
}
using var pngStream = new MemoryStream();
bitmap.Save(pngStream, System.Drawing.Imaging.ImageFormat.Png);
return $"data:image/png;base64,{Convert.ToBase64String(pngStream.ToArray())}";
}
catch
{
return null;
}
}
}

View file

@ -168,4 +168,96 @@ internal static class ImageSource
return false;
}
/// <summary>
/// Try to read pixel dimensions from an image stream by parsing file headers.
/// Cross-platform: supports PNG, JPEG, GIF, BMP without System.Drawing.
/// Resets stream position after reading. Returns null if dimensions cannot be determined.
/// </summary>
public static (int Width, int Height)? TryGetDimensions(Stream stream)
{
if (!stream.CanSeek || stream.Length < 24) return null;
var startPos = stream.Position;
try
{
var header = new byte[30];
stream.Position = 0;
int read = stream.Read(header, 0, header.Length);
if (read < 24) return null;
// PNG: signature 89 50 4E 47, IHDR width/height at offset 16/20 (big-endian)
if (header[0] == 0x89 && header[1] == 0x50 && header[2] == 0x4E && header[3] == 0x47)
{
int w = (header[16] << 24) | (header[17] << 16) | (header[18] << 8) | header[19];
int h = (header[20] << 24) | (header[21] << 16) | (header[22] << 8) | header[23];
return (w > 0 && h > 0) ? (w, h) : null;
}
// BMP: signature 42 4D, width at offset 18 (int32 LE), height at offset 22 (int32 LE)
if (header[0] == 0x42 && header[1] == 0x4D && read >= 26)
{
int w = header[18] | (header[19] << 8) | (header[20] << 16) | (header[21] << 24);
int h = header[22] | (header[23] << 8) | (header[24] << 16) | (header[25] << 24);
if (h < 0) h = -h; // BMP can have negative height (top-down)
return (w > 0 && h > 0) ? (w, h) : null;
}
// GIF: signature 47 49 46 38, width at offset 6 (uint16 LE), height at offset 8 (uint16 LE)
if (header[0] == 0x47 && header[1] == 0x49 && header[2] == 0x46 && header[3] == 0x38)
{
int w = header[6] | (header[7] << 8);
int h = header[8] | (header[9] << 8);
return (w > 0 && h > 0) ? (w, h) : null;
}
// JPEG: signature FF D8, need to find SOFn marker for dimensions
if (header[0] == 0xFF && header[1] == 0xD8)
return TryGetJpegDimensions(stream);
return null;
}
catch
{
return null;
}
finally
{
try { stream.Position = startPos; } catch { }
}
}
private static (int Width, int Height)? TryGetJpegDimensions(Stream stream)
{
stream.Position = 2; // skip SOI marker (FF D8)
var buf = new byte[9];
while (stream.Position < stream.Length - 2)
{
int b1 = stream.ReadByte();
if (b1 != 0xFF) return null;
int b2;
do { b2 = stream.ReadByte(); } while (b2 == 0xFF && stream.Position < stream.Length);
if (b2 < 0) return null;
// SOFn markers: C0-C3, C5-C7, C9-CB, CD-CF
if ((b2 >= 0xC0 && b2 <= 0xC3) || (b2 >= 0xC5 && b2 <= 0xC7) ||
(b2 >= 0xC9 && b2 <= 0xCB) || (b2 >= 0xCD && b2 <= 0xCF))
{
if (stream.Read(buf, 0, 7) < 7) return null;
int h = (buf[3] << 8) | buf[4];
int w = (buf[5] << 8) | buf[6];
return (w > 0 && h > 0) ? (w, h) : null;
}
// SOS marker (DA) — image data starts, no more metadata
if (b2 == 0xDA) return null;
// Skip this marker's data segment
if (stream.Read(buf, 0, 2) < 2) return null;
int len = (buf[0] << 8) | buf[1];
if (len < 2) return null;
stream.Position += len - 2;
}
return null;
}
}

View file

@ -153,13 +153,27 @@ public partial class WordHandler
imagePart.FeedData(imgStream);
var relId = mainPart.GetIdOfPart(imagePart);
// Determine dimensions (default: 6 inches wide, auto height)
long cxEmu = 5486400; // 6 inches in EMUs (914400 * 6)
long cyEmu = 3657600; // 4 inches default
if (properties.TryGetValue("width", out var widthStr))
cxEmu = ParseEmu(widthStr);
if (properties.TryGetValue("height", out var heightStr))
cyEmu = ParseEmu(heightStr);
// Determine dimensions with auto aspect ratio
bool hasExplicitWidth = properties.TryGetValue("width", out var widthStr);
bool hasExplicitHeight = properties.TryGetValue("height", out var heightStr);
long cxEmu = hasExplicitWidth ? ParseEmu(widthStr!) : 5486400; // default: 6 inches
long cyEmu = hasExplicitHeight ? ParseEmu(heightStr!) : 3657600; // default: 4 inches
// Auto-calculate missing dimension from image pixel aspect ratio
if (!hasExplicitWidth || !hasExplicitHeight)
{
var dims = OfficeCli.Core.ImageSource.TryGetDimensions(imgStream);
if (dims is { Width: > 0, Height: > 0 })
{
var (pixW, pixH) = dims.Value;
if (hasExplicitWidth)
cyEmu = (long)(cxEmu * (double)pixH / pixW);
else if (hasExplicitHeight)
cxEmu = (long)(cyEmu * (double)pixW / pixH);
else
cyEmu = (long)(cxEmu * (double)pixH / pixW);
}
}
var altText = properties.GetValueOrDefault("alt", Path.GetFileName(imgPath));
@ -188,10 +202,19 @@ public partial class WordHandler
Paragraph imgPara;
if (parent is Paragraph existingPara)
{
existingPara.AppendChild(imgRun);
var runCount = existingPara.Elements<Run>().Count();
if (index.HasValue && index.Value < runCount)
{
var refRun = existingPara.Elements<Run>().ElementAt(index.Value);
existingPara.InsertBefore(imgRun, refRun);
}
else
{
existingPara.AppendChild(imgRun);
}
imgPara = existingPara;
var imgRunCount = existingPara.Elements<Run>().Count();
resultPath = $"{parentPath}/r[{imgRunCount}]";
var imgRunIdx = existingPara.Elements<Run>().ToList().IndexOf(imgRun) + 1;
resultPath = $"{parentPath}/r[{imgRunIdx}]";
}
else if (parent is TableCell imgCell)
{
@ -206,6 +229,8 @@ public partial class WordHandler
{
imgPara = new Paragraph(imgRun);
AssignParaId(imgPara);
imgPara.PrependChild(new ParagraphProperties(
new SpacingBetweenLines { Line = "240", LineRule = LineSpacingRuleValues.Auto }));
imgCell.AppendChild(imgPara);
}
var imgPIdx = imgCell.Elements<Paragraph>().ToList().IndexOf(imgPara) + 1;
@ -215,17 +240,24 @@ public partial class WordHandler
{
imgPara = new Paragraph(imgRun);
AssignParaId(imgPara);
var imgParaCount = parent.Elements<Paragraph>().Count();
if (index.HasValue && index.Value < imgParaCount)
// Prevent fixed line spacing (inherited from Normal style) from clipping the image
imgPara.PrependChild(new ParagraphProperties(
new SpacingBetweenLines { Line = "240", LineRule = LineSpacingRuleValues.Auto }));
// Use ChildElements for index lookup to match ResolveAnchorPosition
// which computes indices against ChildElements (not just Paragraphs)
var allChildren = parent.ChildElements.ToList();
if (index.HasValue && index.Value < allChildren.Count)
{
var refPara = parent.Elements<Paragraph>().ElementAt(index.Value);
parent.InsertBefore(imgPara, refPara);
resultPath = $"{parentPath}/{BuildParaPathSegment(imgPara, index.Value + 1)}";
var refElement = allChildren[index.Value];
parent.InsertBefore(imgPara, refElement);
var imgPIdx = parent.Elements<Paragraph>().ToList().IndexOf(imgPara) + 1;
resultPath = $"{parentPath}/{BuildParaPathSegment(imgPara, imgPIdx)}";
}
else
{
AppendToParent(parent, imgPara);
resultPath = $"{parentPath}/{BuildParaPathSegment(imgPara, imgParaCount + 1)}";
var imgPIdx = parent.Elements<Paragraph>().Count();
resultPath = $"{parentPath}/{BuildParaPathSegment(imgPara, imgPIdx)}";
}
}
return resultPath;

View file

@ -235,17 +235,21 @@ public partial class WordHandler
para.AppendChild(run);
}
var paraCount = parent.Elements<Paragraph>().Count();
if (index.HasValue && index.Value < paraCount)
// Use ChildElements for index lookup to match ResolveAnchorPosition
// which computes indices against ChildElements (not just Paragraphs)
var allChildren = parent.ChildElements.ToList();
if (index.HasValue && index.Value < allChildren.Count)
{
var refElement = parent.Elements<Paragraph>().ElementAt(index.Value);
var refElement = allChildren[index.Value];
parent.InsertBefore(para, refElement);
resultPath = $"{parentPath}/{BuildParaPathSegment(para, index.Value + 1)}";
var paraPosIdx = parent.Elements<Paragraph>().ToList().IndexOf(para) + 1;
resultPath = $"{parentPath}/{BuildParaPathSegment(para, paraPosIdx)}";
}
else
{
AppendToParent(parent, para);
resultPath = $"{parentPath}/{BuildParaPathSegment(para, paraCount + 1)}";
var paraCount = parent.Elements<Paragraph>().Count();
resultPath = $"{parentPath}/{BuildParaPathSegment(para, paraCount)}";
}
return resultPath;
}

View file

@ -998,13 +998,6 @@ public partial class WordHandler
InsertPosition? position,
Dictionary<string, string> properties)
{
// Parent must be a paragraph (or we navigate to one)
Paragraph para;
if (parent is Paragraph p)
para = p;
else
throw new ArgumentException("after=\"find:...\" / before=\"find:...\" requires a paragraph parent path (e.g. /body/p[1]), not a section-level path like /body.");
// Support regex=true prop as alternative to r"..." prefix
// CONSISTENCY(find-regex): mirror of WordHandler.Set.cs:60-61. grep
// "CONSISTENCY(find-regex)" for every project-wide call site.
@ -1012,6 +1005,22 @@ public partial class WordHandler
findValue = $"r\"{findValue}\"";
var (pattern, isRegex) = ParseFindPattern(findValue);
// Resolve parent to a paragraph — supports both paragraph-level and container-level (body/cell/sdt)
Paragraph para;
string paraPath;
if (parent is Paragraph p)
{
para = p;
paraPath = parentPath;
}
else
{
// Search across all child paragraphs in the container
(para, paraPath) = FindParagraphContainingText(parent, parentPath, pattern, isRegex)
?? throw new ArgumentException($"Text '{findValue}' not found in any paragraph under {parentPath}.");
}
var runTexts = BuildRunTexts(para);
if (runTexts.Count == 0)
throw new ArgumentException("Paragraph has no text content to search.");
@ -1029,14 +1038,39 @@ public partial class WordHandler
if (isInline)
{
return AddInlineAtSplitPoint(para, parentPath, splitPoint, type, position, properties);
return AddInlineAtSplitPoint(para, paraPath, splitPoint, type, position, properties);
}
else
{
return AddBlockAtSplitPoint(para, parentPath, splitPoint, type, position, properties);
return AddBlockAtSplitPoint(para, paraPath, splitPoint, type, position, properties);
}
}
/// <summary>
/// Search child paragraphs of a container for text matching the given pattern.
/// Returns the first matching paragraph and its constructed path.
/// </summary>
private (Paragraph Para, string Path)? FindParagraphContainingText(
OpenXmlElement container, string containerPath, string pattern, bool isRegex)
{
var paragraphs = container.Elements<Paragraph>().ToList();
for (int i = 0; i < paragraphs.Count; i++)
{
var candidate = paragraphs[i];
var runTexts = BuildRunTexts(candidate);
if (runTexts.Count == 0) continue;
var fullText = string.Concat(runTexts.Select(rt => rt.TextElement.Text));
var matches = FindMatchRanges(fullText, pattern, isRegex);
if (matches.Count > 0)
{
var paraPath = $"{containerPath}/{BuildParaPathSegment(candidate, i + 1)}";
return (candidate, paraPath);
}
}
return null;
}
/// <summary>
/// Insert an inline element at a character split point within a paragraph.
/// Splits the run at the position and inserts the element.

View file

@ -171,6 +171,14 @@ public partial class WordHandler
return;
}
// Check for OLE object with preview image (e.g., embedded Visio diagrams)
var oleObj = run.Descendants().FirstOrDefault(e => e.LocalName == "object");
if (oleObj != null)
{
RenderOlePreviewHtml(sb, oleObj);
return;
}
// Footnote/endnote reference — render superscript number (don't return, run may also have text)
var fnRef = run.GetFirstChild<FootnoteReference>();
if (fnRef?.Id?.HasValue == true && fnRef.Id.Value > 0)
@ -280,6 +288,93 @@ public partial class WordHandler
sb.Append("</span>");
}
// ==================== OLE Object Rendering ====================
/// <summary>
/// Render an OLE object's preview image (v:imagedata inside w:object).
/// Handles embedded objects like Visio diagrams that use VML instead of DrawingML.
/// </summary>
private void RenderOlePreviewHtml(StringBuilder sb, OpenXmlElement oleObj)
{
var imageData = oleObj.Descendants()
.FirstOrDefault(e => e.LocalName == "imagedata");
if (imageData == null) return;
// Get r:id (relationship ID to the preview image part)
string? relId = null;
foreach (var attr in imageData.GetAttributes())
{
if (attr.LocalName == "id" && (attr.NamespaceUri?.Contains("relationships") ?? false))
{
relId = attr.Value;
break;
}
}
if (string.IsNullOrEmpty(relId)) return;
var dataUri = LoadImageAsDataUri(relId);
if (dataUri == null) return;
// Get dimensions from v:shape style="width:Xpt;height:Ypt"
double widthPt = 0, heightPt = 0;
var shape = oleObj.Descendants()
.FirstOrDefault(e => e.LocalName == "shape");
if (shape != null)
{
var styleAttr = shape.GetAttributes()
.FirstOrDefault(a => a.LocalName == "style").Value;
if (styleAttr != null)
{
var wMatch = Regex.Match(styleAttr, @"width:([\d.]+)pt");
var hMatch = Regex.Match(styleAttr, @"height:([\d.]+)pt");
if (wMatch.Success) double.TryParse(wMatch.Groups[1].Value,
System.Globalization.NumberStyles.Float,
System.Globalization.CultureInfo.InvariantCulture, out widthPt);
if (hMatch.Success) double.TryParse(hMatch.Groups[1].Value,
System.Globalization.NumberStyles.Float,
System.Globalization.CultureInfo.InvariantCulture, out heightPt);
}
}
// Fallback to dxaOrig/dyaOrig (twips → pt)
if (widthPt == 0 || heightPt == 0)
{
foreach (var attr in oleObj.GetAttributes())
{
if (attr.LocalName == "dxaOrig" && int.TryParse(attr.Value, out var dxa))
widthPt = dxa / 20.0;
if (attr.LocalName == "dyaOrig" && int.TryParse(attr.Value, out var dya))
heightPt = dya / 20.0;
}
}
var widthPx = widthPt > 0 ? (long)(widthPt * 96 / 72) : 0;
var heightPx = heightPt > 0 ? (long)(heightPt * 96 / 72) : 0;
// Check if the image format is browser-renderable
bool isWebCompatible = dataUri.Contains("image/png") || dataUri.Contains("image/jpeg")
|| dataUri.Contains("image/gif") || dataUri.Contains("image/svg")
|| dataUri.Contains("image/webp") || dataUri.Contains("image/bmp");
if (isWebCompatible)
{
var widthAttr = widthPx > 0 ? $" width=\"{widthPx}\"" : "";
var heightAttr = heightPx > 0 ? $" height=\"{heightPx}\"" : "";
var sizeStyle = widthPx > 0 ? $"max-width:100%;width:{widthPx}px;height:auto" : "max-width:100%";
sb.Append($"<img src=\"{dataUri}\" alt=\"Embedded object\"{widthAttr}{heightAttr} style=\"{sizeStyle}\">");
}
else
{
// EMF/WMF/TIFF — browsers can't render natively, show placeholder with dimensions
var ph = widthPx > 0 && heightPx > 0
? $"width:{widthPx}px;height:{heightPx}px;max-width:100%"
: "min-width:200px;min-height:100px";
sb.Append($"<div style=\"{ph};border:1px dashed #bbb;background:#f5f5f5;display:flex;align-items:center;justify-content:center;color:#888;font-size:13px;margin:8px 0\">");
sb.Append("\U0001F4CE Embedded Object (preview not supported in browser)");
sb.Append("</div>");
}
}
// Footnote/endnote reference tracking is in _ctx.FootnoteRefs / _ctx.EndnoteRefs
private void RenderFootnotesHtml(StringBuilder sb)

View file

@ -267,7 +267,15 @@ public partial class WordHandler
var bot=children[ci].offsetTop+children[ci].offsetHeight-body.offsetTop;
if(bot>availH){splitIdx=ci;break;}
}
if(splitIdx<=0)continue;
if(splitIdx<0)continue;
// When the first child itself exceeds page height, keep it and split after
if(splitIdx===0)splitIdx=1;
// Collect movable children from splitIdx onward
var toMove=[];
for(var mi=splitIdx;mi<children.length;mi++){
if(!children[mi].classList.contains('footnotes'))toMove.push(children[mi]);
}
if(toMove.length===0)continue; // irreducibly oversized single element
// Create new page wrapped in page-wrapper
var nw=document.createElement('div');
nw.className='page-wrapper';
@ -276,11 +284,6 @@ public partial class WordHandler
np.style.cssText=page.style.cssText;
var nb=document.createElement('div');
nb.className='page-body';
// Move overflow children to new page (skip footnotes — they stay on the reference page)
var toMove=[];
for(var mi=splitIdx;mi<children.length;mi++){
if(!children[mi].classList.contains('footnotes'))toMove.push(children[mi]);
}
for(var mi=0;mi<toMove.length;mi++){
nb.appendChild(toMove[mi]);
}
@ -317,12 +320,15 @@ public partial class WordHandler
var f=b.querySelector('.footnotes');
var fh=f?f.offsetHeight:0;
var ch=0;
var visibleCount=0;
Array.from(b.children).forEach(function(c){
if(c.classList.contains('footnotes'))return;
var bt=c.offsetTop+c.offsetHeight-b.offsetTop;
if(bt>ch)ch=bt;
if(c.offsetHeight>0)visibleCount++;
});
if(ch>maxBodyH-fh+2)again=true;
// Only re-paginate if overflow AND more than one visible child to split
if(ch>maxBodyH-fh+2 && visibleCount>1)again=true;
});
if(again)setTimeout(paginate,0);
else{setTimeout(positionFootnotes,0);setTimeout(applyPageFilter,0);setTimeout(function(){scalePages(false);},0);}
@ -762,6 +768,7 @@ public partial class WordHandler
var numIdLevelOffset = new Dictionary<int, int>(); // numId → effective ilvl offset for cross-numId nesting
var olCountPerLevel = new Dictionary<int, int>(); // ilvl → running <ol> item count for `start` attribute
var multiLevelCounters = new Dictionary<int, int>(); // ilvl → counter for multi-level numbering
var headingCounters = new Dictionary<int, int>(); // ilvl → counter for heading auto-numbering (from style numPr)
bool pendingLiClose = false; // defer </li> to allow nested lists inside
bool inMultiColumn = false; // track whether we're inside a multi-column div
@ -1056,6 +1063,31 @@ public partial class WordHandler
if (!string.IsNullOrEmpty(hStyle))
sb.Append($" style=\"{hStyle}\"");
sb.Append(">");
// Heading auto-numbering from style (e.g., "1", "1.1", "1.2.1")
var hNumPr = ResolveNumPrFromStyle(para);
if (hNumPr != null)
{
var (hNumId, hIlvl) = hNumPr.Value;
headingCounters[hIlvl] = headingCounters.GetValueOrDefault(hIlvl, 0) + 1;
// Reset deeper level counters
for (int lk = hIlvl + 1; lk <= 8; lk++)
if (headingCounters.ContainsKey(lk)) headingCounters[lk] = 0;
var lvlText = GetLevelText(hNumId, hIlvl);
if (lvlText != null)
{
var numStr = lvlText;
for (int lk = 0; lk <= hIlvl; lk++)
numStr = numStr.Replace($"%{lk + 1}",
headingCounters.GetValueOrDefault(lk, 0).ToString());
// Skip if paragraph text already starts with the number (avoid duplication)
var paraText = GetParagraphText(para).TrimStart();
if (!paraText.StartsWith(numStr, StringComparison.Ordinal))
sb.Append($"<span class=\"heading-num\" style=\"margin-right:0.5em\">{HtmlEncode(numStr)}</span>");
}
}
RenderParagraphContentHtml(sb, para);
sb.AppendLine($"</h{headingLevel}>");
if (hasReflect)

View file

@ -1,6 +1,7 @@
// Copyright 2025 OfficeCli (officecli.ai)
// SPDX-License-Identifier: Apache-2.0
using System.Runtime.Versioning;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Wordprocessing;
using OfficeCli.Core;
@ -203,6 +204,224 @@ public partial class WordHandler
if (extent?.Cy != null) node.Format["height"] = $"{extent.Cy.Value / 360000.0:F1}cm";
if (docProps?.Description?.Value != null) node.Format["alt"] = docProps.Description.Value;
// Detect wrap type and position from inline/anchor
var inlineEl = drawing.GetFirstChild<DW.Inline>();
var anchorEl = drawing.GetFirstChild<DW.Anchor>();
if (inlineEl != null)
{
node.Format["wrap"] = "inline";
}
else if (anchorEl != null)
{
node.Format["wrap"] = DetectWrapType(anchorEl);
if (anchorEl.BehindDoc?.Value == true)
node.Format["behindText"] = true;
var hPos = anchorEl.GetFirstChild<DW.HorizontalPosition>();
if (hPos != null)
{
var offset = hPos.GetFirstChild<DW.PositionOffset>();
if (offset != null && long.TryParse(offset.Text, out var hEmu))
node.Format["hPosition"] = $"{hEmu / 360000.0:F1}cm";
if (hPos.RelativeFrom?.HasValue == true)
node.Format["hRelative"] = hPos.RelativeFrom.InnerText;
}
var vPos = anchorEl.GetFirstChild<DW.VerticalPosition>();
if (vPos != null)
{
var offset = vPos.GetFirstChild<DW.PositionOffset>();
if (offset != null && long.TryParse(offset.Text, out var vEmu))
node.Format["vPosition"] = $"{vEmu / 360000.0:F1}cm";
if (vPos.RelativeFrom?.HasValue == true)
node.Format["vRelative"] = vPos.RelativeFrom.InnerText;
}
}
return node;
}
private static string DetectWrapType(DW.Anchor anchor)
{
if (anchor.GetFirstChild<DW.WrapNone>() != null) return "none";
if (anchor.GetFirstChild<DW.WrapSquare>() != null) return "square";
if (anchor.GetFirstChild<DW.WrapTight>() != null) return "tight";
if (anchor.GetFirstChild<DW.WrapThrough>() != null) return "through";
if (anchor.GetFirstChild<DW.WrapTopBottom>() != null) return "topandbottom";
return "none";
}
private static void ReplaceWrapElement(DW.Anchor anchor, string wrapType)
{
// Remove existing wrap element
anchor.GetFirstChild<DW.WrapNone>()?.Remove();
anchor.GetFirstChild<DW.WrapSquare>()?.Remove();
anchor.GetFirstChild<DW.WrapTight>()?.Remove();
anchor.GetFirstChild<DW.WrapThrough>()?.Remove();
anchor.GetFirstChild<DW.WrapTopBottom>()?.Remove();
OpenXmlElement newWrap = wrapType.ToLowerInvariant() switch
{
"square" => new DW.WrapSquare { WrapText = DW.WrapTextValues.BothSides },
"tight" => new DW.WrapTight(new DW.WrapPolygon(
new DW.StartPoint { X = 0, Y = 0 },
new DW.LineTo { X = 21600, Y = 0 },
new DW.LineTo { X = 21600, Y = 21600 },
new DW.LineTo { X = 0, Y = 21600 },
new DW.LineTo { X = 0, Y = 0 }
) { Edited = false }),
"through" => new DW.WrapThrough(new DW.WrapPolygon(
new DW.StartPoint { X = 0, Y = 0 },
new DW.LineTo { X = 21600, Y = 0 },
new DW.LineTo { X = 21600, Y = 21600 },
new DW.LineTo { X = 0, Y = 21600 },
new DW.LineTo { X = 0, Y = 0 }
) { Edited = false }),
"topandbottom" or "topbottom" => new DW.WrapTopBottom(),
"none" => new DW.WrapNone(),
_ => throw new ArgumentException($"Invalid wrap value: '{wrapType}'. Valid values: none, square, tight, through, topandbottom.")
};
// Insert wrap after EffectExtent (standard OOXML order)
var effectExtent = anchor.GetFirstChild<DW.EffectExtent>();
if (effectExtent != null)
effectExtent.InsertAfterSelf(newWrap);
else
anchor.PrependChild(newWrap);
}
private DocumentNode CreateOleNode(EmbeddedObject oleObj, Run run, string path)
{
var node = new DocumentNode
{
Path = path,
Type = "ole",
Text = ""
};
node.Format["objectType"] = "ole";
// Extract ProgID from o:OLEObject
var oleElement = oleObj.Descendants().FirstOrDefault(e => e.LocalName == "OLEObject");
if (oleElement != null)
{
var progId = oleElement.GetAttributes().FirstOrDefault(a => a.LocalName == "ProgID").Value;
if (progId != null)
{
node.Format["progId"] = progId;
node.Text = progId;
}
}
// Extract dimensions from v:shape style
var shape = oleObj.Descendants().FirstOrDefault(e => e.LocalName == "shape");
if (shape != null)
{
var style = shape.GetAttributes().FirstOrDefault(a => a.LocalName == "style").Value;
if (style != null)
ParseVmlStyle(style, node);
}
// Extract preview image from v:imagedata (Windows only — requires GDI+)
var (previewPath, previewContentType) = OperatingSystem.IsWindowsVersionAtLeast(6, 1)
? ExtractOlePreviewImage(oleObj, path)
: (null, null);
if (previewPath != null)
{
node.Format["previewImage"] = previewPath;
if (previewContentType != null)
node.Format["previewContentType"] = previewContentType;
}
return node;
}
/// <summary>
/// Extract the OLE preview image (EMF/WMF) from v:imagedata, convert to PNG,
/// and save to temp directory. Returns (pngPath, originalContentType) or (null, null).
/// </summary>
[SupportedOSPlatform("windows6.1")]
private (string? path, string? contentType) ExtractOlePreviewImage(EmbeddedObject oleObj, string nodePath)
{
var mainPart = _doc.MainDocumentPart;
if (mainPart == null) return (null, null);
// Find v:imagedata element and its r:id
var shape = oleObj.Descendants().FirstOrDefault(e => e.LocalName == "shape");
if (shape == null) return (null, null);
var imageData = shape.Descendants().FirstOrDefault(e => e.LocalName == "imagedata");
if (imageData == null) return (null, null);
var rId = imageData.GetAttributes().FirstOrDefault(a => a.LocalName == "id").Value;
if (string.IsNullOrEmpty(rId)) return (null, null);
try
{
var imgPart = mainPart.GetPartById(rId);
using var stream = imgPart.GetStream();
using var ms = new MemoryStream();
stream.CopyTo(ms);
ms.Position = 0;
var contentType = imgPart.ContentType ?? "";
var isMetafile = contentType.Contains("emf") || contentType.Contains("wmf")
|| contentType.Contains("metafile");
// Build a stable file name from the node path
var safeId = nodePath.Replace("/", "_").Replace("[", "").Replace("]", "").TrimStart('_');
var pngPath = Path.Combine(Path.GetTempPath(), $"officecli_ole_{safeId}.png");
if (isMetafile)
{
// Convert EMF/WMF to PNG using System.Drawing (Windows GDI+)
using var img = System.Drawing.Image.FromStream(ms);
img.Save(pngPath, System.Drawing.Imaging.ImageFormat.Png);
}
else if (contentType.Contains("png"))
{
using var fs = new FileStream(pngPath, FileMode.Create);
ms.CopyTo(fs);
}
else
{
// JPEG or other raster — convert to PNG for consistency
using var img = System.Drawing.Image.FromStream(ms);
img.Save(pngPath, System.Drawing.Imaging.ImageFormat.Png);
}
return (pngPath, contentType);
}
catch
{
return (null, null);
}
}
private static void ParseVmlStyle(string style, DocumentNode node)
{
foreach (var part in style.Split(';', StringSplitOptions.RemoveEmptyEntries))
{
var kv = part.Split(':', 2);
if (kv.Length != 2) continue;
var k = kv[0].Trim().ToLowerInvariant();
var v = kv[1].Trim();
if (k == "width") node.Format["width"] = ConvertPtToCm(v);
else if (k == "height") node.Format["height"] = ConvertPtToCm(v);
}
}
private static string ConvertPtToCm(string ptValue)
{
// Handle values like "385.45pt"
var num = ptValue.Replace("pt", "").Replace("in", "").Trim();
if (double.TryParse(num, System.Globalization.NumberStyles.Float,
System.Globalization.CultureInfo.InvariantCulture, out var val))
{
if (ptValue.EndsWith("pt", StringComparison.OrdinalIgnoreCase))
return $"{val * 2.54 / 72.0:F1}cm";
if (ptValue.EndsWith("in", StringComparison.OrdinalIgnoreCase))
return $"{val * 2.54:F1}cm";
}
return ptValue; // return as-is if unparseable
}
}

View file

@ -836,6 +836,8 @@ public partial class WordHandler
parsed.Element == "bookmark";
bool isSdtSelector = parsed.ChildSelector == null &&
(parsed.Element == "sdt" || parsed.Element == "contentcontrol");
bool isOleSelector = parsed.ChildSelector == null &&
(parsed.Element is "ole" or "object" or "embed");
// Scheme B: generic XML fallback for unrecognized element types
// Use GenericXmlQuery.ParseSelector which properly handles namespace prefixes (e.g., "a:ln")
@ -855,7 +857,8 @@ public partial class WordHandler
or "style"
or "revision" or "change" or "trackchange"
or "media"
or "hyperlink";
or "hyperlink"
or "ole" or "object" or "embed";
if (!isKnownType && parsed.ChildSelector == null)
{
var root = _doc.MainDocumentPart?.Document;
@ -1434,6 +1437,27 @@ public partial class WordHandler
results.Add(CreateImageNode(drawing, run, $"/body/{BuildParaPathSegment(para, paraIdx + 1)}/r[{runIdx + 1}]"));
}
}
// Also detect OLE embedded objects (Visio, Excel, etc.)
var oleObject = run.GetFirstChild<EmbeddedObject>();
if (oleObject != null)
{
results.Add(CreateOleNode(oleObject, run, $"/body/{BuildParaPathSegment(para, paraIdx + 1)}/r[{runIdx + 1}]"));
}
runIdx++;
}
}
else if (isOleSelector)
{
int runIdx = 0;
foreach (var run in GetAllRuns(para))
{
var oleObject = run.GetFirstChild<EmbeddedObject>();
if (oleObject != null)
{
results.Add(CreateOleNode(oleObject, run, $"/body/{BuildParaPathSegment(para, paraIdx + 1)}/r[{runIdx + 1}]"));
}
runIdx++;
}
}

View file

@ -1038,6 +1038,62 @@ public partial class WordHandler
blip.Embed = mainPartImg.GetIdOfPart(newImgPart);
break;
}
case "wrap":
{
var drawingWrap = run.GetFirstChild<Drawing>();
var anchorWrap = drawingWrap?.GetFirstChild<DW.Anchor>();
if (anchorWrap == null) { unsupported.Add(key); break; }
ReplaceWrapElement(anchorWrap, value);
break;
}
case "hposition":
{
var drawingHP = run.GetFirstChild<Drawing>();
var anchorHP = drawingHP?.GetFirstChild<DW.Anchor>();
var hPosEl = anchorHP?.GetFirstChild<DW.HorizontalPosition>();
if (hPosEl == null) { unsupported.Add(key); break; }
var hOffset = hPosEl.GetFirstChild<DW.PositionOffset>();
if (hOffset != null) hOffset.Text = ParseEmu(value).ToString();
else hPosEl.AppendChild(new DW.PositionOffset(ParseEmu(value).ToString()));
break;
}
case "vposition":
{
var drawingVP = run.GetFirstChild<Drawing>();
var anchorVP = drawingVP?.GetFirstChild<DW.Anchor>();
var vPosEl = anchorVP?.GetFirstChild<DW.VerticalPosition>();
if (vPosEl == null) { unsupported.Add(key); break; }
var vOffset = vPosEl.GetFirstChild<DW.PositionOffset>();
if (vOffset != null) vOffset.Text = ParseEmu(value).ToString();
else vPosEl.AppendChild(new DW.PositionOffset(ParseEmu(value).ToString()));
break;
}
case "hrelative":
{
var drawingHR = run.GetFirstChild<Drawing>();
var anchorHR = drawingHR?.GetFirstChild<DW.Anchor>();
var hPosHR = anchorHR?.GetFirstChild<DW.HorizontalPosition>();
if (hPosHR == null) { unsupported.Add(key); break; }
hPosHR.RelativeFrom = ParseHorizontalRelative(value);
break;
}
case "vrelative":
{
var drawingVR = run.GetFirstChild<Drawing>();
var anchorVR = drawingVR?.GetFirstChild<DW.Anchor>();
var vPosVR = anchorVR?.GetFirstChild<DW.VerticalPosition>();
if (vPosVR == null) { unsupported.Add(key); break; }
vPosVR.RelativeFrom = ParseVerticalRelative(value);
break;
}
case "behindtext":
{
var drawingBT = run.GetFirstChild<Drawing>();
var anchorBT = drawingBT?.GetFirstChild<DW.Anchor>();
if (anchorBT == null) { unsupported.Add(key); break; }
anchorBT.BehindDoc = value.Equals("true", StringComparison.OrdinalIgnoreCase);
break;
}
case "link":
{
var mainPart3 = _doc.MainDocumentPart!;

View file

@ -326,6 +326,50 @@ public partial class WordHandler
// ==================== List / Numbering ====================
/// <summary>
/// Resolve numbering properties (numId, ilvl) from the paragraph's style chain.
/// Checks direct paragraph numPr first, then walks the style hierarchy.
/// Used to detect heading auto-numbering defined in styles.
/// </summary>
private (int numId, int ilvl)? ResolveNumPrFromStyle(Paragraph para)
{
// 1. Direct numPr on the paragraph
var numProps = para.ParagraphProperties?.NumberingProperties;
if (numProps != null)
{
var nid = numProps.NumberingId?.Val?.Value;
if (nid != null && nid != 0)
return (nid.Value, numProps.NumberingLevelReference?.Val?.Value ?? 0);
}
// 2. Walk the style chain
var styleId = para.ParagraphProperties?.ParagraphStyleId?.Val?.Value;
if (styleId == null) return null;
var stylesPart = _doc.MainDocumentPart?.StyleDefinitionsPart;
if (stylesPart?.Styles == null) return null;
var visited = new HashSet<string>();
while (styleId != null && visited.Add(styleId))
{
var style = stylesPart.Styles.Elements<Style>()
.FirstOrDefault(s => s.StyleId?.Value == styleId);
if (style == null) break;
var styleNumPr = style.StyleParagraphProperties?.NumberingProperties;
if (styleNumPr != null)
{
var nid = styleNumPr.NumberingId?.Val?.Value;
if (nid != null && nid != 0)
return (nid.Value, styleNumPr.NumberingLevelReference?.Val?.Value ?? 0);
}
styleId = style.BasedOn?.Val?.Value;
}
return null;
}
private string? GetParagraphListStyle(Paragraph para)
{
var numProps = para.ParagraphProperties?.NumberingProperties;

View file

@ -529,10 +529,12 @@ public partial class WordHandler
var paragraphs = GetBodyElements(body).OfType<Paragraph>().ToList();
var tables = GetBodyElements(body).OfType<Table>().ToList();
var imageCount = body.Descendants<Drawing>().Count();
var oleCount = body.Descendants<EmbeddedObject>().Count();
var equationCount = body.Descendants().Count(e => e.LocalName == "oMathPara" || e is M.Paragraph);
var formFieldCount = FindFormFields().Count;
var contentControlCount = body.Descendants<SdtBlock>().Count() + body.Descendants<SdtRun>().Count();
var statsLine = $"File: {Path.GetFileName(_filePath)} | {paragraphs.Count} paragraphs | {tables.Count} tables | {imageCount} images";
if (oleCount > 0) statsLine += $" | {oleCount} OLE objects";
if (equationCount > 0) statsLine += $" | {equationCount} equations";
if (formFieldCount > 0) statsLine += $" | {formFieldCount} formfields";
if (contentControlCount > 0) statsLine += $" | {contentControlCount} content controls";
@ -734,6 +736,7 @@ public partial class WordHandler
var paragraphs = GetBodyElements(body).OfType<Paragraph>().ToList();
var tables = GetBodyElements(body).OfType<Table>().ToList();
var imageCount = body.Descendants<Drawing>().Count();
var oleCount = body.Descendants<EmbeddedObject>().Count();
var equationCount = body.Descendants().Count(e => e.LocalName == "oMathPara" || e is M.Paragraph);
var formFieldCount = FindFormFields().Count;
@ -747,6 +750,7 @@ public partial class WordHandler
["images"] = imageCount,
["equations"] = equationCount
};
if (oleCount > 0) result["oleObjects"] = oleCount;
if (formFieldCount > 0) result["formfields"] = formFieldCount;
if (contentControlCount > 0) result["contentControls"] = contentControlCount;

View file

@ -17,6 +17,7 @@
<ItemGroup>
<PackageReference Include="DocumentFormat.OpenXml" Version="3.4.1" />
<PackageReference Include="System.CommandLine" Version="3.0.0-preview.2.26159.112" />
<PackageReference Include="System.Drawing.Common" Version="10.0.5" />
</ItemGroup>
<ItemGroup Condition="Exists('wiki/')">

View file

@ -0,0 +1,26 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<IsPackable>false</IsPackable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="coverlet.collector" Version="6.0.4" />
<PackageReference Include="DocumentFormat.OpenXml" Version="3.5.1" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.14.1" />
<PackageReference Include="xunit" Version="2.9.3" />
<PackageReference Include="xunit.runner.visualstudio" Version="3.1.4" />
</ItemGroup>
<ItemGroup>
<Using Include="Xunit" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\src\officecli\officecli.csproj" />
</ItemGroup>
</Project>

View file

@ -0,0 +1,386 @@
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using DocumentFormat.OpenXml.Vml;
using DocumentFormat.OpenXml.Vml.Office;
using OfficeCli.Core;
using OfficeCli.Handlers;
using A = DocumentFormat.OpenXml.Drawing;
using DW = DocumentFormat.OpenXml.Drawing.Wordprocessing;
using PIC = DocumentFormat.OpenXml.Drawing.Pictures;
namespace OfficeCli.Tests;
public class OleAndImageTests : IDisposable
{
private readonly string _testDir;
public OleAndImageTests()
{
_testDir = System.IO.Path.Combine(System.IO.Path.GetTempPath(), "officecli_tests_" + Guid.NewGuid().ToString("N")[..8]);
Directory.CreateDirectory(_testDir);
}
public void Dispose()
{
try { Directory.Delete(_testDir, true); } catch { }
}
private string CreateTestDocx(Action<WordprocessingDocument> configure)
{
var path = System.IO.Path.Combine(_testDir, $"test_{Guid.NewGuid():N}.docx");
using var doc = WordprocessingDocument.Create(path, WordprocessingDocumentType.Document);
var mainPart = doc.AddMainDocumentPart();
mainPart.Document = new Document(new Body());
configure(doc);
return path;
}
/// <summary>Creates an inline image Drawing element for testing.</summary>
private static Run CreateInlineImageRun(MainDocumentPart mainPart, uint docPropId = 1)
{
// Add a tiny 1x1 PNG as image part
var imgPart = mainPart.AddImagePart(ImagePartType.Png);
using (var ms = new MemoryStream(CreateMinimalPng()))
imgPart.FeedData(ms);
var relId = mainPart.GetIdOfPart(imgPart);
long cx = 3600000; // 10cm
long cy = 1800000; // 5cm
var inline = new DW.Inline(
new DW.Extent { Cx = cx, Cy = cy },
new DW.EffectExtent { LeftEdge = 0, TopEdge = 0, RightEdge = 0, BottomEdge = 0 },
new DW.DocProperties { Id = docPropId, Name = "test_image.png", Description = "Test inline image" },
new DW.NonVisualGraphicFrameDrawingProperties(new A.GraphicFrameLocks { NoChangeAspect = true }),
new A.Graphic(
new A.GraphicData(
new PIC.Picture(
new PIC.NonVisualPictureProperties(
new PIC.NonVisualDrawingProperties { Id = docPropId, Name = "test_image.png" },
new PIC.NonVisualPictureDrawingProperties()),
new PIC.BlipFill(
new A.Blip { Embed = relId },
new A.Stretch(new A.FillRectangle())),
new PIC.ShapeProperties(
new A.Transform2D(
new A.Offset { X = 0, Y = 0 },
new A.Extents { Cx = cx, Cy = cy }),
new A.PresetGeometry(new A.AdjustValueList()) { Preset = A.ShapeTypeValues.Rectangle })
)
) { Uri = "http://schemas.openxmlformats.org/drawingml/2006/picture" }
)
)
{ DistanceFromTop = 0U, DistanceFromBottom = 0U, DistanceFromLeft = 0U, DistanceFromRight = 0U };
return new Run(new Drawing(inline));
}
/// <summary>Creates a floating (anchor) image Drawing element with specified wrap.</summary>
private static Run CreateAnchorImageRun(MainDocumentPart mainPart, string wrapType, uint docPropId = 2)
{
var imgPart = mainPart.AddImagePart(ImagePartType.Png);
using (var ms = new MemoryStream(CreateMinimalPng()))
imgPart.FeedData(ms);
var relId = mainPart.GetIdOfPart(imgPart);
long cx = 2160000; // 6cm
long cy = 1440000; // 4cm
long hPos = 720000; // 2cm
long vPos = 360000; // 1cm
OpenXmlElement wrapElement = wrapType switch
{
"square" => new DW.WrapSquare { WrapText = DW.WrapTextValues.BothSides },
"tight" => new DW.WrapTight(new DW.WrapPolygon(
new DW.StartPoint { X = 0, Y = 0 },
new DW.LineTo { X = 21600, Y = 0 },
new DW.LineTo { X = 21600, Y = 21600 },
new DW.LineTo { X = 0, Y = 21600 },
new DW.LineTo { X = 0, Y = 0 }
) { Edited = false }),
"none" => new DW.WrapNone(),
_ => new DW.WrapNone()
};
var anchor = new DW.Anchor(
new DW.SimplePosition { X = 0, Y = 0 },
new DW.HorizontalPosition(new DW.PositionOffset(hPos.ToString()))
{ RelativeFrom = DW.HorizontalRelativePositionValues.Column },
new DW.VerticalPosition(new DW.PositionOffset(vPos.ToString()))
{ RelativeFrom = DW.VerticalRelativePositionValues.Paragraph },
new DW.Extent { Cx = cx, Cy = cy },
new DW.EffectExtent { LeftEdge = 0, TopEdge = 0, RightEdge = 0, BottomEdge = 0 },
wrapElement,
new DW.DocProperties { Id = docPropId, Name = "anchor_image.png", Description = "Floating image" },
new DW.NonVisualGraphicFrameDrawingProperties(new A.GraphicFrameLocks { NoChangeAspect = true }),
new A.Graphic(
new A.GraphicData(
new PIC.Picture(
new PIC.NonVisualPictureProperties(
new PIC.NonVisualDrawingProperties { Id = docPropId, Name = "anchor_image.png" },
new PIC.NonVisualPictureDrawingProperties()),
new PIC.BlipFill(
new A.Blip { Embed = relId },
new A.Stretch(new A.FillRectangle())),
new PIC.ShapeProperties(
new A.Transform2D(
new A.Offset { X = 0, Y = 0 },
new A.Extents { Cx = cx, Cy = cy }),
new A.PresetGeometry(new A.AdjustValueList()) { Preset = A.ShapeTypeValues.Rectangle })
)
) { Uri = "http://schemas.openxmlformats.org/drawingml/2006/picture" }
)
)
{
BehindDoc = false,
DistanceFromTop = 0U, DistanceFromBottom = 0U,
DistanceFromLeft = 114300U, DistanceFromRight = 114300U,
SimplePos = false, RelativeHeight = 1U,
AllowOverlap = true, LayoutInCell = true, Locked = false
};
return new Run(new Drawing(anchor));
}
/// <summary>Creates a minimal OLE embedded object (simulates Visio.Drawing.11).</summary>
private static Run CreateOleObjectRun(string progId = "Visio.Drawing.11", string width = "385.45pt", string height = "397.75pt")
{
// Build raw OLE object XML using OpenXmlUnknownElement for VML/OLE parts
var shapeXml = $"<v:shape xmlns:v=\"urn:schemas-microsoft-com:vml\" " +
$"style=\"width:{width};height:{height}\" />";
var oleXml = $"<o:OLEObject xmlns:o=\"urn:schemas-microsoft-com:office:office\" " +
$"ProgID=\"{progId}\" />";
var shape = new OpenXmlUnknownElement("v", "shape", "urn:schemas-microsoft-com:vml");
shape.SetAttribute(new OpenXmlAttribute("style", "", $"width:{width};height:{height}"));
var oleEl = new OpenXmlUnknownElement("o", "OLEObject", "urn:schemas-microsoft-com:office:office");
oleEl.SetAttribute(new OpenXmlAttribute("ProgID", "", progId));
var embeddedObject = new EmbeddedObject();
embeddedObject.AppendChild(shape);
embeddedObject.AppendChild(oleEl);
return new Run(embeddedObject);
}
private static byte[] CreateMinimalPng()
{
// Minimal valid 1x1 white PNG
return Convert.FromBase64String(
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==");
}
// ===================== Tests =====================
[Fact]
public void Query_Picture_DetectsInlineImage()
{
var path = CreateTestDocx(doc =>
{
var body = doc.MainDocumentPart!.Document!.Body!;
var para = new Paragraph(CreateInlineImageRun(doc.MainDocumentPart!));
body.AppendChild(para);
});
using var handler = new WordHandler(path, false);
var results = handler.Query("picture");
Assert.Single(results);
Assert.Equal("picture", results[0].Type);
Assert.Equal("inline", results[0].Format["wrap"]);
}
[Fact]
public void Query_Picture_DetectsAnchorImageWithWrapType()
{
var path = CreateTestDocx(doc =>
{
var body = doc.MainDocumentPart!.Document!.Body!;
body.AppendChild(new Paragraph(CreateAnchorImageRun(doc.MainDocumentPart!, "square")));
});
using var handler = new WordHandler(path, false);
var results = handler.Query("picture");
Assert.Single(results);
Assert.Equal("picture", results[0].Type);
Assert.Equal("square", results[0].Format["wrap"]);
Assert.Equal("2.0cm", results[0].Format["hPosition"]);
Assert.Equal("1.0cm", results[0].Format["vPosition"]);
Assert.Equal("column", results[0].Format["hRelative"]);
Assert.Equal("paragraph", results[0].Format["vRelative"]);
}
[Fact]
public void Query_Picture_DetectsOleObject()
{
var path = CreateTestDocx(doc =>
{
var body = doc.MainDocumentPart!.Document!.Body!;
body.AppendChild(new Paragraph(CreateOleObjectRun()));
});
using var handler = new WordHandler(path, false);
var results = handler.Query("picture");
Assert.Single(results);
Assert.Equal("ole", results[0].Type);
Assert.Equal("Visio.Drawing.11", results[0].Format["progId"]);
}
[Fact]
public void Query_Picture_ReturnsBothDrawingAndOle()
{
var path = CreateTestDocx(doc =>
{
var body = doc.MainDocumentPart!.Document!.Body!;
body.AppendChild(new Paragraph(CreateInlineImageRun(doc.MainDocumentPart!, 1)));
body.AppendChild(new Paragraph(CreateOleObjectRun()));
body.AppendChild(new Paragraph(CreateOleObjectRun("Excel.Sheet.12", "200pt", "150pt")));
});
using var handler = new WordHandler(path, false);
var results = handler.Query("picture");
Assert.Equal(3, results.Count);
Assert.Equal("picture", results[0].Type);
Assert.Equal("ole", results[1].Type);
Assert.Equal("ole", results[2].Type);
Assert.Equal("Excel.Sheet.12", results[2].Format["progId"]);
}
[Fact]
public void Query_Ole_OnlyReturnsOleObjects()
{
var path = CreateTestDocx(doc =>
{
var body = doc.MainDocumentPart!.Document!.Body!;
body.AppendChild(new Paragraph(CreateInlineImageRun(doc.MainDocumentPart!, 1)));
body.AppendChild(new Paragraph(CreateOleObjectRun()));
body.AppendChild(new Paragraph(CreateOleObjectRun()));
});
using var handler = new WordHandler(path, false);
var results = handler.Query("ole");
Assert.Equal(2, results.Count);
Assert.All(results, r => Assert.Equal("ole", r.Type));
}
[Fact]
public void Query_Object_IsAliasForOle()
{
var path = CreateTestDocx(doc =>
{
var body = doc.MainDocumentPart!.Document!.Body!;
body.AppendChild(new Paragraph(CreateOleObjectRun()));
});
using var handler = new WordHandler(path, false);
var oleResults = handler.Query("ole");
var objectResults = handler.Query("object");
Assert.Single(oleResults);
Assert.Single(objectResults);
Assert.Equal(oleResults[0].Format["progId"], objectResults[0].Format["progId"]);
}
[Fact]
public void Query_Ole_ExtractsDimensions()
{
var path = CreateTestDocx(doc =>
{
var body = doc.MainDocumentPart!.Document!.Body!;
body.AppendChild(new Paragraph(CreateOleObjectRun("Visio.Drawing.11", "385.45pt", "397.75pt")));
});
using var handler = new WordHandler(path, false);
var results = handler.Query("ole");
Assert.Single(results);
Assert.Equal("ole", results[0].Format["objectType"]);
// 385.45pt * 2.54/72 = ~13.6cm
var width = results[0].Format["width"]?.ToString();
Assert.NotNull(width);
Assert.EndsWith("cm", width);
}
[Fact]
public void View_Outline_IncludesOleCount()
{
var path = CreateTestDocx(doc =>
{
var body = doc.MainDocumentPart!.Document!.Body!;
body.AppendChild(new Paragraph(CreateInlineImageRun(doc.MainDocumentPart!, 1)));
body.AppendChild(new Paragraph(CreateOleObjectRun()));
body.AppendChild(new Paragraph(CreateOleObjectRun()));
});
using var handler = new WordHandler(path, false);
var json = handler.ViewAsOutlineJson();
Assert.Equal(1, (int)json["images"]!);
Assert.Equal(2, (int)json["oleObjects"]!);
}
[Fact]
public void View_Outline_NoOleField_WhenZero()
{
var path = CreateTestDocx(doc =>
{
var body = doc.MainDocumentPart!.Document!.Body!;
body.AppendChild(new Paragraph(new Run(new Text("Hello"))));
});
using var handler = new WordHandler(path, false);
var json = handler.ViewAsOutlineJson();
Assert.Null(json["oleObjects"]);
}
[Fact]
public void Query_Picture_WrapNone_BehindText()
{
var path = CreateTestDocx(doc =>
{
var body = doc.MainDocumentPart!.Document!.Body!;
// Create anchor with WrapNone and BehindDoc=true
var imgPart = doc.MainDocumentPart!.AddImagePart(ImagePartType.Png);
using (var ms = new MemoryStream(CreateMinimalPng()))
imgPart.FeedData(ms);
var relId = doc.MainDocumentPart!.GetIdOfPart(imgPart);
long cx = 2160000, cy = 1440000;
var anchor = new DW.Anchor(
new DW.SimplePosition { X = 0, Y = 0 },
new DW.HorizontalPosition(new DW.PositionOffset("0")) { RelativeFrom = DW.HorizontalRelativePositionValues.Page },
new DW.VerticalPosition(new DW.PositionOffset("0")) { RelativeFrom = DW.VerticalRelativePositionValues.Page },
new DW.Extent { Cx = cx, Cy = cy },
new DW.EffectExtent { LeftEdge = 0, TopEdge = 0, RightEdge = 0, BottomEdge = 0 },
new DW.WrapNone(),
new DW.DocProperties { Id = 1, Name = "bg" },
new DW.NonVisualGraphicFrameDrawingProperties(new A.GraphicFrameLocks { NoChangeAspect = true }),
new A.Graphic(new A.GraphicData(
new PIC.Picture(
new PIC.NonVisualPictureProperties(
new PIC.NonVisualDrawingProperties { Id = 1, Name = "bg" },
new PIC.NonVisualPictureDrawingProperties()),
new PIC.BlipFill(new A.Blip { Embed = relId }, new A.Stretch(new A.FillRectangle())),
new PIC.ShapeProperties(
new A.Transform2D(new A.Offset { X = 0, Y = 0 }, new A.Extents { Cx = cx, Cy = cy }),
new A.PresetGeometry(new A.AdjustValueList()) { Preset = A.ShapeTypeValues.Rectangle }))
) { Uri = "http://schemas.openxmlformats.org/drawingml/2006/picture" })
) { BehindDoc = true, SimplePos = false, RelativeHeight = 1U, AllowOverlap = true, LayoutInCell = true, Locked = false };
body.AppendChild(new Paragraph(new Run(new Drawing(anchor))));
});
using var handler = new WordHandler(path, false);
var results = handler.Query("picture");
Assert.Single(results);
Assert.Equal("none", results[0].Format["wrap"]);
Assert.Equal(true, results[0].Format["behindText"]);
Assert.Equal("page", results[0].Format["hRelative"]);
Assert.Equal("page", results[0].Format["vRelative"]);
}
}