chore: remove unused stripHtmlTags function to clean up text transformation utilities

This commit is contained in:
Sergey Kurdin 2025-06-23 15:12:23 -04:00
parent 34d304d1b8
commit 8eb891fbee

View File

@ -193,60 +193,6 @@ const toJsonStringify = (text: string): string => {
}
}
// Helper function to strip HTML tags and return plain text with normalized whitespace
const stripHtmlTags = (html: string): string => {
// First, add line breaks before block-level elements to preserve structure
const blockElements = [
'p',
'div',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'ul',
'ol',
'li',
'blockquote',
'pre',
'address',
'article',
'aside',
'footer',
'header',
'nav',
'section',
'table',
'tr',
'br',
]
// Create regex pattern for block elements
const blockRegex = new RegExp(`<(${blockElements.join('|')})[^>]*>`, 'gi')
// Add newlines before block elements to preserve structure
let processedHtml = html.replace(blockRegex, '\n<$1>')
// Create a temporary div element
const div = document.createElement('div')
div.innerHTML = processedHtml
// Get text content, which automatically strips all HTML tags
const text = div.textContent || div.innerText || ''
// Normalize whitespace while preserving structure
return text
.replace(/\r\n/g, '\n') // Normalize Windows line endings to Unix
.replace(/\r/g, '\n') // Normalize old Mac line endings to Unix
.replace(/\n{3,}/g, '\n\n') // Replace 3+ newlines with double newline
.replace(/\t+/g, '\t') // Replace multiple tabs with single tab
.replace(/[ \t]+/g, ' ') // Replace multiple spaces/tabs with single space
.replace(/\n[ \t]+/g, '\n') // Remove spaces/tabs at the beginning of lines
.replace(/[ \t]+\n/g, '\n') // Remove spaces/tabs at the end of lines
.replace(/\n{2,}/g, '\n') // Replace multiple newlines with single newline
.trim() // Remove leading and trailing whitespace
}
// Format Converter subcategories - organized by source format
const formatConverterSubcategories = [
{