Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update: update code block to ```python {.marimo} #3387

Merged
merged 9 commits into from
Jan 31, 2025
179 changes: 155 additions & 24 deletions marimo/_cli/convert/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,87 @@
SuperFencesCodeExtension,
)

from marimo import _loggers
from marimo._ast import codegen
from marimo._ast.app import App, InternalApp, _AppConfig
from marimo._ast.cell import Cell, CellConfig
from marimo._ast.compiler import compile_cell
from marimo._ast.names import DEFAULT_CELL_NAME
from marimo._convert.utils import markdown_to_marimo
from marimo._dependencies.dependencies import DependencyManager

LOGGER = _loggers.marimo_logger()

MARIMO_MD = "marimo-md"
MARIMO_CODE = "marimo-code"

ConvertKeys = Union[Literal["marimo"], Literal["marimo-app"]]

# Regex captures loose yaml for frontmatter
# Should match the following:
# ---
# title: "Title"
# whatever
# ---
YAML_FRONT_MATTER_REGEX = re.compile(
r"^---\s*\n(.*?\n?)(?:---)\s*\n", re.UNICODE | re.DOTALL
)

# From pymdownx.superfences 10.11.2
COMPAT_RE_NESTED_FENCE_START = re.compile(
r"""(?x)
(?P<fence>~{3,}|`{3,})
(?:[ \t]*\.?(?P<lang>[\w#.+-]+)(?=[\t ]|$))? # Language
(?:
[ \t]*(\{(?P<attrs>[^\n]*)\}) | # Optional attributes or
(?P<options>
(?:
(?:[ \t]*[a-zA-Z][a-zA-Z0-9_]*(?:=(?P<quot>"|').*?(?P=quot))?)(?=[\t ]|$) # Options
)+
)
)?[ \t]*$
"""
)


def backwards_compatible_sanitization(line: str) -> str:
return line


def extract_attribs(
line: str, fence_start: Optional[re.Match] = None
) -> dict[str, str]:
# Extract attributes from the code block.
# Blocks are expected to be like this:
# python {.marimo disabled="true"}
if fence_start is None:
fence_start = RE_NESTED_FENCE_START.match(line)

if fence_start:
# attrs is a bit of a misnomer, matches
# .python.marimo disabled="true"
inner = fence_start.group("attrs")
if inner:
return dict(re.findall(r'(\w+)="([^"]*)"', inner))
return {}


def _is_code_tag(text: str) -> bool:
head = text.split("\n")[0].strip()
return bool(re.search(r"\{.*python.*\}", head))
legacy_format = bool(re.search(r"\{.*python.*\}", head))
legacy_format |= bool(re.search(r"\{.*sql.*\}", head))
if DependencyManager.new_superfences.has_required_version(quiet=True):
supported_format = bool(re.search(r".*\{.*marimo.*\}", head))
return legacy_format or supported_format
return legacy_format


def _get_language(text: str) -> str:
header = text.split("\n").pop()
match = RE_NESTED_FENCE_START.match(header)
if match:
return match.group("lang")
return "python"


def formatted_code_block(
Expand All @@ -46,14 +111,24 @@ def formatted_code_block(
"""Wraps code in a fenced code block with marimo attributes."""
if attributes is None:
attributes = {}
language = attributes.pop("language", "python")
attribute_str = " ".join(
[""] + [f'{key}="{value}"' for key, value in attributes.items()]
)
guard = "```"
while guard in code:
guard += "`"
if DependencyManager.new_superfences.has_required_version(quiet=True):
return "\n".join(
[
f"""{guard}{language} {{.marimo{attribute_str}}}""",
code,
guard,
"",
]
)
return "\n".join(
[f"""{guard}{{.python.marimo{attribute_str}}}""", code, guard, ""]
[f"""{guard}{{.{language}.marimo{attribute_str}}}""", code, guard, ""]
)


Expand Down Expand Up @@ -82,6 +157,8 @@ def get_source_from_tag(tag: Element) -> str:
if not (source and source.strip()):
return ""
source = markdown_to_marimo(source)
elif tag.attrib.get("language") == "sql":
dmadisetti marked this conversation as resolved.
Show resolved Hide resolved
return "#sql + " + source
else:
assert tag.tag == MARIMO_CODE, f"Unknown tag: {tag.tag}"
return source
Expand Down Expand Up @@ -220,6 +297,10 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
self.preprocessors.register(
FrontMatterPreprocessor(self), "frontmatter", 100
)
# Preprocess for backwards compatibility.
self.preprocessors.register(
MdCompatPreprocessor(self), "md-compat", 100
)
fences_ext = SuperFencesCodeExtension()
fences_ext.extendMarkdown(self)
# TODO: Consider adding the admonition extension, and integrating it
Expand Down Expand Up @@ -272,15 +353,6 @@ def __init__(self, md: MarimoParser):
super().__init__(md)
self.md = md
self.md.meta = {}
# Regex captures loose yaml for frontmatter
# Should match the following:
# ---
# title: "Title"
# whatever
# ---
self.yaml_front_matter_regex = re.compile(
r"^---\s*\n(.*?\n?)(?:---)\s*\n", re.UNICODE | re.DOTALL
)

def run(self, lines: list[str]) -> list[str]:
import yaml
Expand All @@ -295,7 +367,7 @@ def run(self, lines: list[str]) -> list[str]:
return lines

doc = "\n".join(lines)
result = self.yaml_front_matter_regex.match(doc)
result = YAML_FRONT_MATTER_REGEX.match(doc)

if result:
yaml_content = result.group(1)
Expand All @@ -310,6 +382,71 @@ def run(self, lines: list[str]) -> list[str]:
return doc.split("\n")


class MdCompatPreprocessor(Preprocessor):
dmadisetti marked this conversation as resolved.
Show resolved Hide resolved
"""Preprocessor for backwards compatibility with old code blocks.

This preprocessor is used to convert old code blocks to the new format.
"""

def run(self, lines: list[str]) -> list[str]:
response = []
old_to_old = False
old_to_new = False
for line in lines:
# TODO: Remove with some minor release.
if DependencyManager.new_superfences.has_required_version(
quiet=True
):
response.append(line)
continue
# If old format, old regex, pass through but warn.
if RE_NESTED_FENCE_START.match(line):
old_to_old = True
response.append(line)
continue
# There's a chance that the new format is used with the old regex.
# since pymdownx.superfences < 10.11
# will not match
# ```lang {.marimo}
# put it into the old format, so super fences can handle it.
if new_match := COMPAT_RE_NESTED_FENCE_START.match(line):
old_to_new = True
attribute_str = " ".join(
[""]
+ [
f'{key}="{value}"'
for key, value in extract_attribs(
line, new_match
).items()
]
)
response.append(
"".join(
[
new_match.group("fence"),
"{",
".",
new_match.group("lang"),
".marimo",
attribute_str,
"}",
]
)
)
continue
response.append(line)
if old_to_old:
LOGGER.warning(
"Legacy format used for code block. Please update pymdownx to >= 10.11"
)

if old_to_new:
LOGGER.warning(
"Unsupported code fence, applying heuristic. Please update pymdownx to >= 10.11"
)
return response


class SanitizeProcessor(Preprocessor):
"""Prevent unintended executable code block injection.

Expand Down Expand Up @@ -414,18 +551,12 @@ def add_paragraph() -> None:
code_block = SubElement(parent, MARIMO_CODE)
block_lines = code.split("\n")
code_block.text = "\n".join(block_lines[1:-1])
# Extract attributes from the code block.
# Blocks are expected to be like this:
# {.python.marimo disabled="true"}
fence_start = RE_NESTED_FENCE_START.match(block_lines[0])
if fence_start:
# attrs is a bit of a misnomer, matches
# .python.marimo disabled="true"
inner = fence_start.group("attrs")
if inner:
code_block.attrib = dict(
re.findall(r'(\w+)="([^"]*)"', inner)
)
code_block.set("language", _get_language(code))

attribs = extract_attribs(block_lines[0])
if attribs:
code_block.attrib = attribs

add_paragraph()
# Flush to indicate all blocks have been processed.
blocks.clear()
Expand Down
36 changes: 29 additions & 7 deletions marimo/_dependencies/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class Dependency:
min_version: str | None = None
max_version: str | None = None

def has(self) -> bool:
def has(self, quiet=False) -> bool:
"""Return True if the dependency is installed."""
try:
has_dep = importlib.util.find_spec(self.pkg) is not None
Expand All @@ -24,20 +24,32 @@ def has(self) -> bool:
# Could happen for nested imports (e.g. foo.bar)
return False

if self.min_version or self.max_version:
if not quiet and (self.min_version or self.max_version):
self.warn_if_mismatch_version(self.min_version, self.max_version)
return True

def has_at_version(
self, *, min_version: str | None, max_version: str | None = None
self,
*,
min_version: str | None,
max_version: str | None = None,
quiet=False,
) -> bool:
if not self.has():
if not self.has(quiet=quiet):
return False
return _version_check(
pkg=self.pkg,
v=self.get_version(),
min_v=min_version,
max_v=max_version,
quiet=quiet,
)

def has_required_version(self, quiet=False) -> bool:
return self.has_at_version(
min_version=self.min_version,
max_version=self.max_version,
quiet=quiet,
)

def imported(self) -> bool:
Expand Down Expand Up @@ -75,7 +87,10 @@ def require_at_version(
)

def get_version(self) -> str:
return importlib.metadata.version(self.pkg)
try:
return importlib.metadata.version(self.pkg)
except importlib.metadata.PackageNotFoundError:
return __import__(self.pkg).__version__

def warn_if_mismatch_version(
self,
Expand Down Expand Up @@ -111,6 +126,7 @@ def _version_check(
min_v: str | None = None,
max_v: str | None = None,
raise_error: bool = False,
quiet: bool = False,
) -> bool:
if min_v is None and max_v is None:
return True
Expand All @@ -125,14 +141,16 @@ def _version_check(
msg = f"Mismatched version of {pkg}: expected >={min_v}, got {v}"
if raise_error:
raise RuntimeError(msg)
sys.stderr.write(f"{msg}. Some features may not work correctly.")
if not quiet:
sys.stderr.write(f"{msg}. Some features may not work correctly.")
return False

if parsed_max_version is not None and parsed_v >= parsed_max_version:
msg = f"Mismatched version of {pkg}: expected <{max_v}, got {v}"
if raise_error:
raise RuntimeError(msg)
sys.stderr.write(f"{msg}. Some features may not work correctly.")
if not quiet:
sys.stderr.write(f"{msg}. Some features may not work correctly.")
return False

return True
Expand Down Expand Up @@ -170,6 +188,10 @@ class DependencyManager:
groq = Dependency("groq")
panel = Dependency("panel")

# Version requirements to properly support the new superfences introduced in
# pymdown#2470
new_superfences = Dependency("pymdownx", min_version="10.11.0")

@staticmethod
def has(pkg: str) -> bool:
"""Return True if any lib is installed."""
Expand Down
5 changes: 5 additions & 0 deletions marimo/_output/md.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@

import markdown # type: ignore
import pymdownx.emoji # type: ignore
from pymdownx.superfences import ( # type: ignore
SuperFencesCodeExtension,
)

from marimo._output.hypertext import Html
from marimo._output.md_extensions.external_links import ExternalLinksExtension
Expand Down Expand Up @@ -108,6 +111,8 @@ def __init__(
ExternalLinksExtension(),
# Iconify
IconifyExtension(),
# Code fences for consistent syntax highlighting
dmadisetti marked this conversation as resolved.
Show resolved Hide resolved
SuperFencesCodeExtension(),
],
extension_configs=extension_configs, # type: ignore[arg-type]
).strip()
Expand Down
4 changes: 2 additions & 2 deletions marimo/_server/export/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,6 @@ def _format_value(v: Optional[str | list[str]]) -> str | list[str]:
except SyntaxError:
pass

# Definitely no "cell"; as such, treat as code, as everything in
# marimo is code.
if cell:
markdown = get_markdown_from_cell(cell, code)
# Unsanitized markdown is forced to code.
Expand All @@ -309,6 +307,8 @@ def _format_value(v: Optional[str | list[str]]) -> str | list[str]:
previous_was_markdown = True
document.append(markdown)
continue
# Definitely no "cell"; as such, treat as code, as everything in
# marimo is code.
else:
attributes["unparsable"] = "true"
# Add a blank line between markdown and code
Expand Down
Loading
Loading