Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#31 merge global local schema #54

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions blurry/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ async def write_html_file(
extra_context["sibling_pages"] = sibling_pages
folder_in_build = convert_content_path_to_directory_in_build(file_data.path)

schema_type = file_data.front_matter.get("@type")
schema_type = file_data.top_level_type
if not schema_type:
raise ValueError(
f"Required @type value missing in file or TOML front matter invalid: "
Expand Down Expand Up @@ -207,9 +207,10 @@ async def build(release=True):
file_data_by_directory[directory] = []

# Convert Markdown file to HTML
body, front_matter = convert_markdown_file_to_html(filepath)
body, front_matter, top_level_type = convert_markdown_file_to_html(filepath)
file_data = MarkdownFileData(
body=body,
top_level_type=top_level_type,
front_matter=front_matter,
path=relative_filepath,
)
Expand Down
111 changes: 94 additions & 17 deletions blurry/markdown/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
from typing import TypeAlias
from typing import TypeGuard

import json
from pyld import jsonld

import mistune
from mistune import BlockState
from mistune.plugins.abbr import abbr
Expand Down Expand Up @@ -149,9 +152,96 @@ def is_blurry_renderer(
+ [plugin.load() for plugin in discovered_markdown_plugins],
)

SCHEMA_ORG = json.loads('{ "@vocab": "https://schema.org/" }')
def jsonld_document_loader(secure=False, fragments=[], **kwargs):
"""
Create a Requests document loader.

Can be used to setup extra Requests args such as verify, cert, timeout,
or others.

:param secure: require all requests to use HTTPS (default: False).
:param fragments: the fragments of schema loaded as dicts
:param **kwargs: extra keyword args for Requests get() call.

:return: the RemoteDocument loader function.
"""
from pyld.jsonld import JsonLdError

def loader(ignored, options={}):
"""
Retrieves JSON-LD from the dicts provided as fragments.

:param ignored: this positional paramter is ignored, because the tomls fragments are side loaded

:return: the RemoteDocument.
"""
fragments_str = []
for fragment in fragments:
if not fragment.get('@context'):
fragment['@context'] = SCHEMA_ORG
fragments_str.append(json.dumps(fragment))
# print("==========================")
# print(json.dumps(fragment, indent=2))

result = '[' + ','.join(fragments_str) + ']'
# print(">>>>>>>>> ",result)

doc = {
'contentType': 'application/ld+json',
'contextUrl': None,
'documentUrl': None,
'document': result
}
return doc

def convert_markdown_file_to_html(filepath: Path) -> tuple[str, dict[str, Any]]:
return loader

def add_inferred_schema(local_front_matter: dict, filepath: Path) -> dict:
CONTENT_DIR = get_content_directory()

# Add inferred/computed/relative values
local_front_matter.update({"url": content_path_to_url(filepath.relative_to(CONTENT_DIR))})

# Add inferred/computed/relative values
# https://schema.org/image
# https://schema.org/thumbnailUrl
if image := front_matter.get("image"):
image_copy = deepcopy(image)
relative_image_path = get_relative_image_path_from_image_property(image_copy)
image_path = resolve_relative_path_in_markdown(relative_image_path, filepath)
front_matter["image"] = update_image_with_url(image_copy, image_path)
front_matter["thumbnailUrl"] = image_path_to_thumbnailUrl(image_path)

return local_front_matter

def resolve_front_matter(state: dict, filepath: Path) -> tuple[dict[str, Any], str]:
if SETTINGS.get("FRONT_MATTER_RESOLUTION") == "merge":
try:
global_schema = dict(SETTINGS.get("SCHEMA_DATA", {}))
if not global_schema.get('@context'):
global_schema['@context'] = SCHEMA_ORG

local_schema = state.env.get("front_matter", {})
top_level_type = local_schema.get("@type", None)
if not local_schema.get('@context'):
local_schema['@context'] = SCHEMA_ORG
local_schema = add_inferred_schema(local_schema, filepath)
jsonld.set_document_loader(jsonld_document_loader(fragments=[global_schema, local_schema]))
front_matter: dict[str, Any] = jsonld.compact("ignore", SCHEMA_ORG)
except Exception as e:
print("merging front matter failed:", e)
raise e
else:
# Seed front_matter with schema_data from config file
front_matter: dict[str, Any] = dict(SETTINGS.get("SCHEMA_DATA", {}))
front_matter.update(state.env.get("front_matter", {}))
front_matter = add_inferred_schema(front_matter, filepath)

top_level_type = None
return front_matter, top_level_type

def convert_markdown_file_to_html(filepath: Path) -> tuple[str, dict[str, Any], str]:
if not markdown.renderer:
raise Exception("Blurry markdown renderer not set on Mistune Markdown instance")

Expand All @@ -167,26 +257,13 @@ def convert_markdown_file_to_html(filepath: Path) -> tuple[str, dict[str, Any]]:
html, state = markdown.parse(markdown_text, state=state)

if not is_str(html):
raise Exception(f"Expected html to be a string but got: {type(html)}")
raise Exception(f"Expected html to be a string but got: {top_level_type(html)}")

# Post-process HTML
html = remove_lazy_loading_from_first_image(html)

# Seed front_matter with schema_data from config file
front_matter: dict[str, Any] = dict(SETTINGS.get("SCHEMA_DATA", {}))
front_matter.update(state.env.get("front_matter", {}))

# Add inferred/computed/relative values
# https://schema.org/image
# https://schema.org/thumbnailUrl
front_matter.update({"url": content_path_to_url(filepath.relative_to(CONTENT_DIR))})
if image := front_matter.get("image"):
image_copy = deepcopy(image)
relative_image_path = get_relative_image_path_from_image_property(image_copy)
image_path = resolve_relative_path_in_markdown(relative_image_path, filepath)
front_matter["image"] = update_image_with_url(image_copy, image_path)
front_matter["thumbnailUrl"] = image_path_to_thumbnailUrl(image_path)
return html, front_matter
front_matter, top_level_type = resolve_front_matter(state, filepath)
return html, front_matter, top_level_type


def image_path_to_thumbnailUrl(image_path: Path):
Expand Down
2 changes: 2 additions & 0 deletions blurry/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class Settings(TypedDict):
USE_HTTP: bool
RUNSERVER: bool
FRONTMATTER_NON_SCHEMA_VARIABLE_PREFIX: str
FRONT_MATTER_RESOLUTION: str


SETTINGS: Settings = {
Expand All @@ -46,6 +47,7 @@ class Settings(TypedDict):
"RUNSERVER": False,
"FRONTMATTER_NON_SCHEMA_VARIABLE_PREFIX": "~",
"TEMPLATE_SCHEMA_TYPES": {},
"FRONT_MATTER_RESOLUTION": "overwrite", # or "merge"
}


Expand Down
1 change: 1 addition & 0 deletions blurry/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
@dataclass
class MarkdownFileData:
body: str
top_level_type: str
front_matter: dict[str, Any]
path: Path

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "blurry-cli"
version = "0.6.2"
version = "0.6.2.1"
description = "A Mistune-based static site generator for Python"
authors = ["John Franey <franey@duck.com>"]
license = "MIT"
Expand Down
4 changes: 4 additions & 0 deletions tests/test_sitemap.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,19 @@
directory_file_data = [
MarkdownFileData(
front_matter=dict(datePublished=date(2021, 1, 1), url="/blog/a-post-1/"),
top_level_type = "WebPage",
body="",
path=blog_path / "a-post-1",
),
MarkdownFileData(
front_matter=dict(datePublished=date(2021, 3, 1), url="/blog/b-post-3/"),
top_level_type = "WebPage",
body="",
path=blog_path / "b-post-3",
),
MarkdownFileData(
front_matter=dict(dateCreated=date(2021, 2, 1), url="/blog/c-post-2/"),
top_level_type = "WebPage",
body="",
path=blog_path / "c-post-2",
),
Expand All @@ -27,6 +30,7 @@
dateModified=date(2022, 1, 13),
url="/blog/c-post-4/",
),
top_level_type = "WebPage",
body="",
path=blog_path / "c-post-4",
),
Expand Down
4 changes: 4 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,21 +67,25 @@ def test_sort_directory_file_data_by_date():
blog_path: [
MarkdownFileData(
front_matter=dict(datePublished=date(2021, 1, 1)),
top_level_type = "WebPage",
body="",
path=Path("a-post-1"),
),
MarkdownFileData(
front_matter=dict(datePublished=date(2021, 3, 1)),
top_level_type = "WebPage",
body="",
path=Path("b-post-3"),
),
MarkdownFileData(
front_matter=dict(dateCreated=date(2021, 2, 1)),
top_level_type = "WebPage",
body="",
path=Path("c-post-2"),
),
MarkdownFileData(
front_matter=dict(),
top_level_type = "WebPage",
body="",
path=Path("c-post-4"),
),
Expand Down
Loading