Split a Markdown document into chunks whose text length does not exceed maxChunkSize whenever possible. Lines must remain intact.
When a new chunk starts inside a Markdown header section, prepend the active header path to that chunk. For example, if the current content is under # A and ## B, the new chunk should start with those header lines before adding the next content lines.
Return each chunk as a display string where the lines inside that chunk are joined by |.
Constraints
Header lines are lines whose first non-space character is #. If a single non-header line plus its active headers is longer than maxChunkSize, keep that line in its own chunk with the active headers.
Example 1
Input:
markdown = "# A\nshort\nlonger line\n## B\nx\ny"
maxChunkSize = 20
Output:
["# A | short","# A | longer line","# A | ## B | x | y"]
Explanation:
The second chunk repeats the active # A header before continuing content under that section.
Example 2
Input:
markdown = "# Guide\nalpha\nbeta"
maxChunkSize = 30
Output:
["# Guide | alpha | beta"]
Explanation: The whole document fits within one chunk.
解法
扫描每一行:若是 header 行,更新活跃 header 栈(按 # 数判定层级,弹掉同级及更深的,再 push 当前);否则视为内容行。维护当前 chunk 的行列表 cur(初始为活跃 header 列表)。每加入新行前先计算"加入后 cur 行用 | 拼接的长度",若 > maxChunkSize 且 cur 中已有内容行,则把 cur 收尾、新 chunk 重新以活跃 header 起头。单行(含 header 前缀)若仍超长,仍保留为一个 chunk。时间 O(总字符),空间 O(总字符)。
from typing import List
def markdown_header_chunks(markdown: str, max_chunk_size: int) -> List[str]:
lines = markdown.split("\n") if markdown else []
chunks: List[str] = []
# active_headers: list of (level, line_text)
active: list = []
cur: list = []
def flush():
if cur:
chunks.append(" | ".join(cur))
def header_lines():
return [h[1] for h in active]
def start_new_chunk():
nonlocal cur
cur = list(header_lines())
for line in lines:
s = line.lstrip()
is_header = s.startswith("#")
if is_header:
level = len(s) - len(s.lstrip("#"))
while active and active[-1][0] >= level:
active.pop()
active.append((level, line))
# if cur already had content and adding this header pushes past limit, start new chunk
tentative = " | ".join(cur + [line])
if cur and len(tentative) > max_chunk_size and any(c not in header_lines()[:-0] for c in cur):
flush()
start_new_chunk()
cur.append(line) if line not in cur else None
else:
cur.append(line)
else:
tentative = " | ".join(cur + [line])
if cur and len(tentative) > max_chunk_size:
# decide if cur has any non-header content
has_content = len(cur) > len(header_lines())
if has_content:
flush()
start_new_chunk()
cur.append(line)
else:
cur.append(line)
flush()
return chunksimport java.util.*;
class Solution {
public List<String> markdownHeaderChunks(String markdown, int maxChunkSize) {
List<String> out = new ArrayList<>();
if (markdown == null || markdown.isEmpty()) return out;
String[] lines = markdown.split("\n", -1);
List<int[]> activeLevels = new ArrayList<>();
List<String> activeText = new ArrayList<>();
List<String> cur = new ArrayList<>();
for (String line : lines) {
String trimmed = line.replaceAll("^\\s+", "");
boolean isHeader = trimmed.startsWith("#");
if (isHeader) {
int level = 0;
while (level < trimmed.length() && trimmed.charAt(level) == '#') level++;
while (!activeLevels.isEmpty() && activeLevels.get(activeLevels.size() - 1)[0] >= level) {
activeLevels.remove(activeLevels.size() - 1);
activeText.remove(activeText.size() - 1);
}
activeLevels.add(new int[]{level});
activeText.add(line);
}
String tentative = String.join(" | ", cur) + (cur.isEmpty() ? "" : " | ") + line;
boolean hasContent = cur.size() > activeText.size() - (isHeader ? 1 : 0);
if (!cur.isEmpty() && tentative.length() > maxChunkSize && hasContent) {
out.add(String.join(" | ", cur));
cur = new ArrayList<>(activeText.subList(0, isHeader ? activeText.size() - 1 : activeText.size()));
}
cur.add(line);
}
if (!cur.isEmpty()) out.add(String.join(" | ", cur));
return out;
}
}#include <bits/stdc++.h>
using namespace std;
class Solution {
public:
vector<string> markdownHeaderChunks(string markdown, int maxChunkSize) {
vector<string> out;
if (markdown.empty()) return out;
vector<string> lines;
stringstream ss(markdown); string ln;
while (getline(ss, ln, '\n')) lines.push_back(ln);
vector<int> activeLevels;
vector<string> activeText;
vector<string> cur;
auto join = [](const vector<string>& v) {
string s; for (size_t i = 0; i < v.size(); i++) { if (i) s += " | "; s += v[i]; } return s;
};
for (auto& line : lines) {
size_t p = 0; while (p < line.size() && isspace((unsigned char)line[p])) p++;
bool isHeader = p < line.size() && line[p] == '#';
if (isHeader) {
int lvl = 0; while (p + lvl < line.size() && line[p + lvl] == '#') lvl++;
while (!activeLevels.empty() && activeLevels.back() >= lvl) { activeLevels.pop_back(); activeText.pop_back(); }
activeLevels.push_back(lvl); activeText.push_back(line);
}
string tent = cur.empty() ? line : join(cur) + " | " + line;
int curContent = (int)cur.size() - ((int)activeText.size() - (isHeader ? 1 : 0));
if (!cur.empty() && (int)tent.size() > maxChunkSize && curContent > 0) {
out.push_back(join(cur));
cur.assign(activeText.begin(), activeText.end() - (isHeader ? 1 : 0));
}
cur.push_back(line);
}
if (!cur.empty()) out.push_back(join(cur));
return out;
}
};