import { ribbit, getWindow } from './setup'; import { InlineTokenizer, type InlineToken } from '../src/ts/tokenizer'; import { MarkdownSerializer, type SerializerTagDef } from '../src/ts/serializer'; // Set up DOM globals before any tests run getWindow(); const boldDef = { delimiter: '**', htmlTag: 'strong', recursive: true, precedence: 40, }; const italicDef = { delimiter: '*', htmlTag: 'em', recursive: true, precedence: 50, }; const strikeDef = { delimiter: '~~', htmlTag: 'del', recursive: true, precedence: 45, }; const codeDef = { delimiter: '`', htmlTag: 'code', recursive: false, precedence: 10, }; const tokenizer = new InlineTokenizer([boldDef, italicDef, strikeDef, codeDef]); function roles(tokens: InlineToken[]): string[] { return tokens.map(token => token.role); } function values(tokens: InlineToken[]): string[] { return tokens.map(token => token.value); } describe('InlineTokenizer', () => { describe('plain text', () => { it('produces a single text token', () => { const tokens = tokenizer.tokenize('hello world'); expect(roles(tokens)).toEqual(['text']); expect(values(tokens)).toEqual(['hello world']); }); }); describe('bold', () => { it('tokenizes **bold**', () => { const tokens = tokenizer.tokenize('**bold**'); expect(roles(tokens)).toEqual(['open', 'text', 'close']); expect(tokens[0].delimiter).toBe('**'); expect(tokens[1].value).toBe('bold'); }); it('tokenizes text **bold** text', () => { const tokens = tokenizer.tokenize('hello **bold** end'); expect(roles(tokens)).toEqual(['text', 'open', 'text', 'close', 'text']); }); }); describe('italic', () => { it('tokenizes *italic*', () => { const tokens = tokenizer.tokenize('*italic*'); expect(roles(tokens)).toEqual(['open', 'text', 'close']); expect(tokens[0].delimiter).toBe('*'); }); }); describe('strikethrough', () => { it('tokenizes ~~struck~~', () => { const tokens = tokenizer.tokenize('~~struck~~'); expect(roles(tokens)).toEqual(['open', 'text', 'close']); expect(tokens[0].delimiter).toBe('~~'); }); }); describe('code spans', () => { it('tokenizes `code`', () => { const tokens = tokenizer.tokenize('`code`'); expect(roles(tokens)).toEqual(['code']); expect(tokens[0].content).toBe('code'); }); it('does not parse delimiters inside code', () => { const tokens = tokenizer.tokenize('`**not bold**`'); expect(roles(tokens)).toEqual(['code']); expect(tokens[0].content).toBe('**not bold**'); }); }); describe('backslash escapes', () => { it('\\* becomes literal *', () => { const tokens = tokenizer.tokenize('\\*hello'); expect(roles(tokens)).toEqual(['text']); expect(tokens[0].value).toBe('*hello'); }); it('\\\\ becomes literal \\', () => { const tokens = tokenizer.tokenize('\\\\'); expect(roles(tokens)).toEqual(['text']); expect(tokens[0].value).toBe('\\'); }); it('\\n at end of line is a hard break', () => { const tokens = tokenizer.tokenize('hello\\\nworld'); expect(roles(tokens)).toEqual(['text', 'break', 'text']); }); }); describe('hard line breaks', () => { it('two trailing spaces before newline', () => { const tokens = tokenizer.tokenize('hello \nworld'); expect(roles(tokens)).toEqual(['text', 'break', 'text']); }); it('single space does not break', () => { const tokens = tokenizer.tokenize('hello \nworld'); const breakTokens = tokens.filter(token => token.role === 'break'); expect(breakTokens.length).toBe(0); }); }); describe('entity resolution', () => { it('& becomes &', () => { const tokens = tokenizer.tokenize('a & b'); expect(tokens[0].value).toBe('a & b'); }); it('{ becomes {', () => { const tokens = tokenizer.tokenize('{'); expect(tokens[0].value).toBe('{'); }); it('{ becomes {', () => { const tokens = tokenizer.tokenize('{'); expect(tokens[0].value).toBe('{'); }); }); describe('links', () => { it('tokenizes [text](url)', () => { const tokens = tokenizer.tokenize('[click](http://x)'); expect(roles(tokens)).toEqual(['link']); expect(tokens[0].href).toBe('http://x'); expect(tokens[0].value).toBe('click'); }); it('tokenizes [text](url "title")', () => { const tokens = tokenizer.tokenize('[click](http://x "My Title")'); expect(tokens[0].title).toBe('My Title'); }); it('disallows [ in link text', () => { const tokens = tokenizer.tokenize('[outer [inner](b)](a)'); // Should not match as a single link const linkTokens = tokens.filter(token => token.role === 'link'); expect(linkTokens.length).toBeLessThanOrEqual(1); }); }); describe('autolinks', () => { it('tokenizes ', () => { const tokens = tokenizer.tokenize(''); expect(roles(tokens)).toEqual(['autolink']); expect(tokens[0].href).toBe('https://example.com'); }); it('tokenizes bare URL', () => { const tokens = tokenizer.tokenize('visit https://example.com today'); expect(tokens.some(token => token.role === 'autolink')).toBe(true); }); }); describe('HTML passthrough', () => { it('tokenizes HTML tags', () => { const tokens = tokenizer.tokenize('a b c'); const htmlTokens = tokens.filter(token => token.role === 'html'); expect(htmlTokens.length).toBe(2); expect(htmlTokens[0].value).toBe(''); expect(htmlTokens[1].value).toBe(''); }); }); describe('flanking rules', () => { it('mid-word * is not a delimiter', () => { const tokens = tokenizer.tokenize('2*3*4'); expect(roles(tokens)).toEqual(['text']); }); it('* at word boundary is a delimiter', () => { const tokens = tokenizer.tokenize('*hello*'); expect(roles(tokens)).toEqual(['open', 'text', 'close']); }); }); describe('nested delimiters', () => { it('bold inside italic', () => { const tokens = tokenizer.tokenize('*hello **world***'); const openTokens = tokens.filter(token => token.role === 'open'); expect(openTokens.length).toBe(2); }); }); }); describe('MarkdownSerializer', () => { const tagMap = new Map([ ['STRONG', { delimiter: '**' }], ['B', { delimiter: '**' }], ['EM', { delimiter: '*' }], ['I', { delimiter: '*' }], ['DEL', { delimiter: '~~' }], ['CODE', { serialize: (element) => '`' + (element.textContent || '') + '`', }], ['A', { serialize: (element, children) => { const href = element.getAttribute('href') || ''; const title = element.getAttribute('title'); const titlePart = title ? ` "${title}"` : ''; return '[' + children() + '](' + href + titlePart + ')'; }, }], ['BR', { serialize: () => ' \n', }], ]); const delimiterChars = new Set(['*', '`', '~']); const serializer = new MarkdownSerializer(tagMap, delimiterChars); it('serializes plain text', () => { const div = document.createElement('div'); div.textContent = 'hello world'; expect(serializer.serialize(div)).toBe('hello world'); }); it('serializes bold', () => { const div = document.createElement('div'); div.innerHTML = 'bold'; expect(serializer.serialize(div)).toBe('**bold**'); }); it('serializes italic', () => { const div = document.createElement('div'); div.innerHTML = 'italic'; expect(serializer.serialize(div)).toBe('*italic*'); }); it('escapes * in text nodes', () => { const div = document.createElement('div'); div.textContent = 'hello * world'; expect(serializer.serialize(div)).toBe('hello \\* world'); }); it('escapes _ in text nodes', () => { const div = document.createElement('div'); div.textContent = 'hello_world'; expect(serializer.serialize(div)).toBe('hello\\_world'); }); it('escapes \\ in text nodes', () => { const div = document.createElement('div'); div.textContent = 'back\\slash'; expect(serializer.serialize(div)).toBe('back\\\\slash'); }); it('escapes < before letters', () => { const div = document.createElement('div'); div.textContent = 'a c'; expect(serializer.serialize(div)).toBe('a \\ c'); }); it('does not escape < before non-letters', () => { const div = document.createElement('div'); div.textContent = '1 < 2'; expect(serializer.serialize(div)).toBe('1 < 2'); }); it('does not escape * inside delimiters', () => { const div = document.createElement('div'); div.innerHTML = 'bold'; const result = serializer.serialize(div); // The ** are delimiter tokens, not escaped expect(result).toBe('**bold**'); expect(result).not.toContain('\\*'); }); it('escapes * in text adjacent to delimiters', () => { const div = document.createElement('div'); div.innerHTML = 'bold * text'; const result = serializer.serialize(div); expect(result).toContain('\\*'); }); it('serializes link', () => { const div = document.createElement('div'); div.innerHTML = 'click'; expect(serializer.serialize(div)).toBe('[click](http://x)'); }); it('serializes link with title', () => { const div = document.createElement('div'); div.innerHTML = 'click'; expect(serializer.serialize(div)).toBe('[click](http://x "T")'); }); it('serializes code', () => { const div = document.createElement('div'); div.innerHTML = 'x'; expect(serializer.serialize(div)).toBe('`x`'); }); it('serializes hard break', () => { const div = document.createElement('div'); div.innerHTML = 'hello
world'; expect(serializer.serialize(div)).toBe('hello \nworld'); }); });