You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

429 lines
18 KiB

var assert = require("assert");
var path = require("path");
var fs = require("fs");
var _ = require("underscore");
var mammoth = require("../");
var promises = require("../lib/promises");
var results = require("../lib/results");
var testing = require("./testing");
var test = require("./test")(module);
var testData = testing.testData;
var createFakeDocxFile = testing.createFakeDocxFile;
test('should convert docx containing one paragraph to single p element', function() {
var docxPath = path.join(__dirname, "test-data/single-paragraph.docx");
return mammoth.convertToHtml({path: docxPath}).then(function(result) {
assert.equal(result.value, "<p>Walking on imported air</p>");
assert.deepEqual(result.messages, []);
});
});
test('should convert docx represented by a Buffer', function() {
var docxPath = path.join(__dirname, "test-data/single-paragraph.docx");
return promises.nfcall(fs.readFile, docxPath)
.then(function(buffer) {
return mammoth.convertToHtml({buffer: buffer});
})
.then(function(result) {
assert.equal(result.value, "<p>Walking on imported air</p>");
assert.deepEqual(result.messages, []);
});
});
test('should read docx xml files with unicode byte order mark', function() {
var docxPath = path.join(__dirname, "test-data/utf8-bom.docx");
return mammoth.convertToHtml({path: docxPath}).then(function(result) {
assert.equal(result.value, "<p>This XML has a byte order mark.</p>");
assert.deepEqual(result.messages, []);
});
});
test('empty paragraphs are ignored by default', function() {
var docxPath = path.join(__dirname, "test-data/empty.docx");
return mammoth.convertToHtml({path: docxPath}).then(function(result) {
assert.equal(result.value, "");
assert.deepEqual(result.messages, []);
});
});
test('empty paragraphs are preserved if ignoreEmptyParagraphs is false', function() {
var docxPath = path.join(__dirname, "test-data/empty.docx");
return mammoth.convertToHtml({path: docxPath}, {ignoreEmptyParagraphs: false}).then(function(result) {
assert.equal(result.value, "<p></p>");
assert.deepEqual(result.messages, []);
});
});
test('style map can be expressed as string', function() {
var docxFile = createFakeDocxFile({
"word/document.xml": testData("simple/word/document.xml")
});
var options = {
styleMap: "p => h1"
};
return mammoth.convertToHtml({file: docxFile}, options).then(function(result) {
assert.equal("<h1>Hello.</h1>", result.value);
});
});
test('style map can be expressed as array of style mappings', function() {
var docxFile = createFakeDocxFile({
"word/document.xml": testData("simple/word/document.xml")
});
var options = {
styleMap: ["p => h1"]
};
return mammoth.convertToHtml({file: docxFile}, options).then(function(result) {
assert.equal("<h1>Hello.</h1>", result.value);
});
});
test('embedded style map is used if present', function() {
var docxPath = path.join(__dirname, "test-data/embedded-style-map.docx");
return mammoth.convertToHtml({path: docxPath}).then(function(result) {
assert.equal(result.value, "<h1>Walking on imported air</h1>");
assert.deepEqual(result.messages, []);
});
});
test('explicit style map takes precedence over embedded style map', function() {
var docxPath = path.join(__dirname, "test-data/embedded-style-map.docx");
var options = {
styleMap: ["p => p"]
};
return mammoth.convertToHtml({path: docxPath}, options).then(function(result) {
assert.equal(result.value, "<p>Walking on imported air</p>");
assert.deepEqual(result.messages, []);
});
});
test('explicit style map is combined with embedded style map', function() {
var docxPath = path.join(__dirname, "test-data/embedded-style-map.docx");
var options = {
styleMap: ["r => strong"]
};
return mammoth.convertToHtml({path: docxPath}, options).then(function(result) {
assert.equal(result.value, "<h1><strong>Walking on imported air</strong></h1>");
assert.deepEqual(result.messages, []);
});
});
test('embedded style maps can be disabled', function() {
var docxPath = path.join(__dirname, "test-data/embedded-style-map.docx");
var options = {
includeEmbeddedStyleMap: false
};
return mammoth.convertToHtml({path: docxPath}, options).then(function(result) {
assert.equal(result.value, "<p>Walking on imported air</p>");
assert.deepEqual(result.messages, []);
});
});
test('embedded style map can be written and then read', function() {
var docxPath = path.join(__dirname, "test-data/single-paragraph.docx");
return promises.nfcall(fs.readFile, docxPath)
.then(function(buffer) {
return mammoth.embedStyleMap({buffer: buffer}, "p => h1");
})
.then(function(docx) {
return mammoth.convertToHtml({buffer: docx.toBuffer()});
})
.then(function(result) {
assert.equal(result.value, "<h1>Walking on imported air</h1>");
assert.deepEqual(result.messages, []);
});
});
test('embedded style map can be retrieved', function() {
var docxPath = path.join(__dirname, "test-data/single-paragraph.docx");
return promises.nfcall(fs.readFile, docxPath)
.then(function(buffer) {
return mammoth.embedStyleMap({buffer: buffer}, "p => h1");
})
.then(function(docx) {
return mammoth.readEmbeddedStyleMap({buffer: docx.toBuffer()});
})
.then(function(styleMap) {
assert.equal(styleMap, "p => h1");
});
});
test('warning if style mapping is not understood', function() {
var docxPath = path.join(__dirname, "test-data/single-paragraph.docx");
var options = {
styleMap: "????\np => h1"
};
return mammoth.convertToHtml({path: docxPath}, options).then(function(result) {
assert.equal("<h1>Walking on imported air</h1>", result.value);
var warning = "Did not understand this style mapping, so ignored it: ????\n" +
'Error was at character number 1: Expected element type but got unrecognisedCharacter "?"';
assert.deepEqual(result.messages, [results.warning(warning)]);
});
});
test('options are passed to document converter when calling mammoth.convertToHtml', function() {
var docxFile = createFakeDocxFile({
"word/document.xml": testData("simple/word/document.xml")
});
var options = {
styleMap: "p => h1"
};
return mammoth.convertToHtml({file: docxFile}, options).then(function(result) {
assert.equal("<h1>Hello.</h1>", result.value);
});
});
test('options.transformDocument is used to transform document if set', function() {
var docxFile = createFakeDocxFile({
"word/document.xml": testData("simple/word/document.xml")
});
var options = {
transformDocument: function(document) {
document.children[0].styleId = "Heading1";
return document;
}
};
return mammoth.convertToHtml({file: docxFile}, options).then(function(result) {
assert.equal("<h1>Hello.</h1>", result.value);
});
});
test('mammoth.transforms.paragraph only transforms paragraphs', function() {
var docxFile = createFakeDocxFile({
"word/document.xml": testData("simple/word/document.xml")
});
var options = {
transformDocument: mammoth.transforms.paragraph(function(paragraph) {
return _.extend(paragraph, {styleId: "Heading1"});
})
};
return mammoth.convertToHtml({file: docxFile}, options).then(function(result) {
assert.equal("<h1>Hello.</h1>", result.value);
});
});
test('inline images referenced by path relative to part are included in output', function() {
var docxPath = path.join(__dirname, "test-data/tiny-picture.docx");
return mammoth.convertToHtml({path: docxPath}).then(function(result) {
assert.equal(result.value, '<p><img src="" /></p>');
});
});
test('inline images referenced by path relative to base are included in output', function() {
var docxPath = path.join(__dirname, "test-data/tiny-picture-target-base-relative.docx");
return mammoth.convertToHtml({path: docxPath}).then(function(result) {
assert.equal(result.value, '<p><img src="" /></p>');
});
});
test('src of inline images can be changed', function() {
var docxPath = path.join(__dirname, "test-data/tiny-picture.docx");
var convertImage = mammoth.images.imgElement(function(element) {
return element.read("base64").then(function(encodedImage) {
return {src: encodedImage.substring(0, 2) + "," + element.contentType};
});
});
return mammoth.convertToHtml({path: docxPath}, {convertImage: convertImage}).then(function(result) {
assert.equal(result.value, '<p><img src="iV,image/png" /></p>');
});
});
test('images stored outside of document are included in output', function() {
var docxPath = path.join(__dirname, "test-data/external-picture.docx");
return mammoth.convertToHtml({path: docxPath}).then(function(result) {
assert.equal(result.value, '<p><img src="" /></p>');
assert.deepEqual(result.messages, []);
});
});
test('error if images stored outside of document are specified when passing file without path', function() {
var docxPath = path.join(__dirname, "test-data/external-picture.docx");
var buffer = fs.readFileSync(docxPath);
return mammoth.convertToHtml({buffer: buffer}).then(function(result) {
assert.equal(result.value, '');
assert.equal(result.messages[0].message, "could not find external image 'tiny-picture.png', path of input document is unknown");
assert.equal(result.messages[0].type, "error");
});
});
test('simple list is converted to list elements', function() {
var docxPath = path.join(__dirname, "test-data/simple-list.docx");
return mammoth.convertToHtml({path: docxPath}).then(function(result) {
assert.equal(result.value, '<ul><li>Apple</li><li>Banana</li></ul>');
});
});
test('word tables are converted to html tables', function() {
var docxPath = path.join(__dirname, "test-data/tables.docx");
return mammoth.convertToHtml({path: docxPath}).then(function(result) {
var expectedHtml = "<p>Above</p>" +
"<table>" +
"<tr><td><p>Top left</p></td><td><p>Top right</p></td></tr>" +
"<tr><td><p>Bottom left</p></td><td><p>Bottom right</p></td></tr>" +
"</table>" +
"<p>Below</p>";
assert.equal(result.value, expectedHtml);
assert.deepEqual(result.messages, []);
});
});
test('footnotes are appended to text', function() {
// TODO: don't duplicate footnotes with multiple references
var docxPath = path.join(__dirname, "test-data/footnotes.docx");
var options = {
idPrefix: "doc-42-"
};
return mammoth.convertToHtml({path: docxPath}, options).then(function(result) {
var expectedOutput = '<p>Ouch' +
'<sup><a href="#doc-42-footnote-1" id="doc-42-footnote-ref-1">[1]</a></sup>.' +
'<sup><a href="#doc-42-footnote-2" id="doc-42-footnote-ref-2">[2]</a></sup></p>' +
'<ol><li id="doc-42-footnote-1"><p> A tachyon walks into a bar. <a href="#doc-42-footnote-ref-1">↑</a></p></li>' +
'<li id="doc-42-footnote-2"><p> Fin. <a href="#doc-42-footnote-ref-2">↑</a></p></li></ol>';
assert.equal(result.value, expectedOutput);
assert.deepEqual(result.messages, []);
});
});
test('endnotes are appended to text', function() {
var docxPath = path.join(__dirname, "test-data/endnotes.docx");
var options = {
idPrefix: "doc-42-"
};
return mammoth.convertToHtml({path: docxPath}, options).then(function(result) {
var expectedOutput = '<p>Ouch' +
'<sup><a href="#doc-42-endnote-2" id="doc-42-endnote-ref-2">[1]</a></sup>.' +
'<sup><a href="#doc-42-endnote-3" id="doc-42-endnote-ref-3">[2]</a></sup></p>' +
'<ol><li id="doc-42-endnote-2"><p> A tachyon walks into a bar. <a href="#doc-42-endnote-ref-2">↑</a></p></li>' +
'<li id="doc-42-endnote-3"><p> Fin. <a href="#doc-42-endnote-ref-3">↑</a></p></li></ol>';
assert.equal(result.value, expectedOutput);
assert.deepEqual(result.messages, []);
});
});
test('relationships are handled properly in footnotes', function() {
var docxPath = path.join(__dirname, "test-data/footnote-hyperlink.docx");
var options = {
idPrefix: "doc-42-"
};
return mammoth.convertToHtml({path: docxPath}, options).then(function(result) {
var expectedOutput =
'<p><sup><a href="#doc-42-footnote-1" id="doc-42-footnote-ref-1">[1]</a></sup></p>' +
'<ol><li id="doc-42-footnote-1"><p> <a href="http://www.example.com">Example</a> <a href="#doc-42-footnote-ref-1">↑</a></p></li></ol>';
assert.equal(result.value, expectedOutput);
assert.deepEqual(result.messages, []);
});
});
test('when style mapping is defined for comment references then comments are included', function() {
var docxPath = path.join(__dirname, "test-data/comments.docx");
var options = {
idPrefix: "doc-42-",
styleMap: "comment-reference => sup"
};
return mammoth.convertToHtml({path: docxPath}, options).then(function(result) {
var expectedOutput = (
'<p>Ouch' +
'<sup><a href="#doc-42-comment-0" id="doc-42-comment-ref-0">[MW1]</a></sup>.' +
'<sup><a href="#doc-42-comment-2" id="doc-42-comment-ref-2">[MW2]</a></sup></p>' +
'<dl><dt id="doc-42-comment-0">Comment [MW1]</dt><dd><p>A tachyon walks into a bar. <a href="#doc-42-comment-ref-0">↑</a></p></dd>' +
'<dt id="doc-42-comment-2">Comment [MW2]</dt><dd><p>Fin. <a href="#doc-42-comment-ref-2">↑</a></p></dd></dl>'
);
assert.equal(result.value, expectedOutput);
assert.deepEqual(result.messages, []);
});
});
test('textboxes are read', function() {
var docxPath = path.join(__dirname, "test-data/text-box.docx");
return mammoth.convertToHtml({path: docxPath}).then(function(result) {
var expectedOutput = '<p>Datum plane</p>';
assert.equal(result.value, expectedOutput);
});
});
test('underline is ignored by default', function() {
var docxPath = path.join(__dirname, "test-data/underline.docx");
return mammoth.convertToHtml({path: docxPath}).then(function(result) {
assert.equal(result.value, '<p><strong>The Sunset Tree</strong></p>');
});
});
test('underline can be configured with style mapping', function() {
var docxPath = path.join(__dirname, "test-data/underline.docx");
return mammoth.convertToHtml({path: docxPath}, {styleMap: "u => em"}).then(function(result) {
assert.equal(result.value, '<p><strong>The <em>Sunset</em> Tree</strong></p>');
});
});
test('strikethrough is converted to <s> by default', function() {
var docxPath = path.join(__dirname, "test-data/strikethrough.docx");
return mammoth.convertToHtml({path: docxPath}).then(function(result) {
assert.equal(result.value, "<p><s>Today's Special: Salmon</s> Sold out</p>");
});
});
test('strikethrough conversion can be configured with style mappings', function() {
var docxPath = path.join(__dirname, "test-data/strikethrough.docx");
return mammoth.convertToHtml({path: docxPath}, {styleMap: "strike => del"}).then(function(result) {
assert.equal(result.value, "<p><del>Today's Special: Salmon</del> Sold out</p>");
});
});
test('indentation is used if prettyPrint is true', function() {
var docxPath = path.join(__dirname, "test-data/single-paragraph.docx");
return mammoth.convertToHtml({path: docxPath}, {prettyPrint: true}).then(function(result) {
assert.equal(result.value, "<p>\n Walking on imported air\n</p>");
assert.deepEqual(result.messages, []);
});
});
test('using styleMapping throws error', function() {
try {
mammoth.styleMapping();
} catch (error) {
assert.equal(
error.message,
'Use a raw string instead of mammoth.styleMapping e.g. "p[style-name=\'Title\'] => h1" instead of mammoth.styleMapping("p[style-name=\'Title\'] => h1")'
);
}
});
test('can convert single paragraph to markdown', function() {
var docxPath = path.join(__dirname, "test-data/single-paragraph.docx");
return mammoth.convertToMarkdown({path: docxPath}).then(function(result) {
assert.equal(result.value, "Walking on imported air\n\n");
assert.deepEqual(result.messages, []);
});
});
test('extractRawText only retains raw text', function() {
var docxPath = path.join(__dirname, "test-data/simple-list.docx");
return mammoth.extractRawText({path: docxPath}).then(function(result) {
assert.equal(result.value, 'Apple\n\nBanana\n\n');
});
});
test('extractRawText can use .docx files represented by a Buffer', function() {
var docxPath = path.join(__dirname, "test-data/single-paragraph.docx");
return promises.nfcall(fs.readFile, docxPath)
.then(function(buffer) {
return mammoth.extractRawText({buffer: buffer});
})
.then(function(result) {
assert.equal(result.value, "Walking on imported air\n\n");
assert.deepEqual(result.messages, []);
});
});
test('should throw error if file is not a valid docx document', function() {
var docxPath = path.join(__dirname, "test-data/empty.zip");
return mammoth.convertToHtml({path: docxPath}).then(function(result) {
assert.ok(false, "Expected error");
}, function(error) {
assert.equal(error.message, "Could not find main document part. Are you sure this is a valid .docx file?");
});
});