Skip to content

Commit

Permalink
Fix: abstract limit (#268)
Browse files Browse the repository at this point in the history
Patches sent by mail by @tiosgz. Applied with `git am`. It's my very
first time in this kind of [git email
workflow](https://git-send-email.io/). Pfiu !.

Command used:

```sh
git am --3way --ignore-space-change v2-0001-fix-retrieving-article-description.patch
git am --3way --ignore-space-change v2-0002-docs-configuration-fix-update-abstract_-chars_cou.patch
git am --3way --ignore-space-change v2-0003-tests-add-test-cases-for-abstract_delimiter.patch
```

Supersedes #202

cc @craigbox  @YDX-2147483647
  • Loading branch information
Guts authored Apr 23, 2024
2 parents a08052a + d81039d commit 0fb63da
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 41 deletions.
18 changes: 8 additions & 10 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -240,28 +240,26 @@ Output:

### `abstract_chars_count`: item description length

To fill each [item description element](https://www.w3schools.com/xml/rss_tag_title_link_description_item.asp):
Used, in combination with `abstract_delimiter`, to determine each [item description element](https://www.w3schools.com/xml/rss_tag_title_link_description_item.asp):

- If this value is set to `-1`, then the articles' full HTML content will be filled into the description element.
- be careful: if set to `0` and there is no description, the feed's compliance is broken (an item must have a description)
- Otherwise, the plugin first tries to retrieve the value of the keyword `description` from the [page metadata].
- If the value is non-negative and no `description` meta is found, then the plugin retrieves the first number of characters of the page content defined by this setting. Retrieved content is the raw markdown converted roughly into HTML.
- If that fails and `abstract_delimiter` is found in the page, the article content up to (but not including) the delimiter is used.
- If the above has failed, then the plugin retrieves the first number of characters of the page content defined by this setting. Retrieved content is the raw markdown converted roughly into HTML.

Be careful: if set to `0` and there is no description, the feed's compliance is broken (an item must have a description).

`abstract_chars_count`: number of characters to use as item description.

Default: `150`

----

#### `abstract_delimiter`: abstract delimiter

Used to fill each [item description element](https://www.w3schools.com/xml/rss_tag_title_link_description_item.asp):
### `abstract_delimiter`: abstract delimiter

- If this value is set to `-1`, then the full HTML content will be filled into the description element.
- Otherwise, the plugin first tries to retrieve the value of the key `description` from the page metadata.
- If the value is non-negative and no `description` meta is found, then the plugin retrieves the first number of characters of the page content defined by this setting. Retrieved content is the raw markdown converted rougthly into HTML (i.e. without extension, etc.).
Please see `abstract_chars_count` for how this setting is used. A value of `""` (the empty string) disables this step.

`abstract_delimiter`: string to mark .
`abstract_delimiter`: string to mark where the description ends.

Default: `<!-- more -->`

Expand Down
50 changes: 22 additions & 28 deletions mkdocs_rss_plugin/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,8 @@ def get_description_or_abstract(
self, in_page: Page, chars_count: int = 160, abstract_delimiter: str = None
) -> str:
"""Returns description from page meta. If it doesn't exist, use the \
{chars_count} first characters from page content (in markdown).
page content up to {abstract_delimiter} or the {chars_count} first \
characters from page content (in markdown).
:param Page in_page: page to look at
:param int chars_count: if page.meta.description is not set, number of chars \
Expand All @@ -468,22 +469,16 @@ def get_description_or_abstract(

description = in_page.meta.get("description")

# Set chars_count to None if it is set to be unlimited, for slicing.
if chars_count < 0:
chars_count = None

# If the abstract chars is not unlimited and the description exists,
# return the description.
if description and chars_count is not None:
# If the full page is wanted (unlimited chars count)
if chars_count == -1 and (in_page.content or in_page.markdown):
if in_page.content:
return in_page.content
else:
return markdown.markdown(in_page.markdown, output_format="html5")
# If the description is explicitly given
elif description:
return description
# If no description and chars_count set to 0, return empty string
elif not description and chars_count == 0:
logger.warning(
f"No description set for page {in_page.file.src_uri} "
"and 'abstract_chars_count' set to 0. The feed won't be compliant, "
"because an item must have a description."
)
return ""
# If the abstract is cut by the delimiter
elif (
abstract_delimiter
and (
Expand All @@ -495,24 +490,23 @@ def get_description_or_abstract(
in_page.markdown[:excerpt_separator_position],
output_format="html5",
)
# If chars count is unlimited, use the html content
elif in_page.content and chars_count == -1:
if chars_count is None or len(in_page.content) < chars_count:
return in_page.content[:chars_count]
# Use markdown
elif in_page.markdown:
if chars_count is None or len(in_page.markdown) < chars_count:
return markdown.markdown(
in_page.markdown[:chars_count], output_format="html5"
)
# Use first chars_count from the markdown
elif chars_count > 0 and in_page.markdown:
if len(in_page.markdown) <= chars_count:
return markdown.markdown(in_page.markdown, output_format="html5")
else:
return markdown.markdown(
f"{in_page.markdown[: chars_count - 3]}...",
output_format="html5",
)
# Unlimited chars_count but no content is found, then return the description.
# No explicit description and no (or empty) abstract found
else:
return description if description else ""
logger.warning(
f"No description generated from metadata or content of the page {in_page.file.src_uri}, "
"therefore the feed won't be compliant, "
"because an item must have a description."
)
return ""

def get_image(self, in_page: Page, base_url: str) -> Optional[Tuple[str, str, int]]:
"""Get page's image from page meta or social cards and returns properties.
Expand Down
5 changes: 5 additions & 0 deletions tests/fixtures/docs/page_without_meta_early_delimiter.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Page without meta with early delimiter

<!-- more -->

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
23 changes: 23 additions & 0 deletions tests/fixtures/mkdocs_item_delimiter_empty.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Project information
site_name: MkDocs RSS Plugin - TEST
site_description: Basic setup to test against MkDocs RSS plugin
site_author: Julien Moura (Guts)
site_url: https://guts.github.io/mkdocs-rss-plugin
copyright: "Guts - In Geo Veritas"

# Repository
repo_name: "guts/mkdocs-rss-plugin"
repo_url: "https://github.com/guts/mkdocs-rss-plugin"

use_directory_urls: true

plugins:
- rss:
abstract_delimiter: ""

theme:
name: readthedocs

# Extensions to enhance markdown
markdown_extensions:
- meta
52 changes: 49 additions & 3 deletions tests/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,9 +403,55 @@ def test_simple_build_item_length_unlimited(self):
"Page without meta with short text",
"Blog sample",
):
self.assertGreaterEqual(
len(feed_item.description), 150, feed_item.title
)
self.assertGreater(len(feed_item.description), 150, feed_item.title)

def test_simple_build_item_delimiter(self):
with tempfile.TemporaryDirectory() as tmpdirname:
cli_result = self.build_docs_setup(
testproject_path="docs",
mkdocs_yml_filepath=Path("tests/fixtures/mkdocs_minimal.yml"),
output_path=tmpdirname,
strict=True,
)
if cli_result.exception is not None:
e = cli_result.exception
logger.debug(format_exception(type(e), e, e.__traceback__))

self.assertEqual(cli_result.exit_code, 0)
self.assertIsNone(cli_result.exception)

# created items
feed_parsed = feedparser.parse(Path(tmpdirname) / OUTPUT_RSS_FEED_CREATED)
self.assertEqual(feed_parsed.bozo, 0)

for feed_item in feed_parsed.entries:
if feed_item.title in ("Page without meta with early delimiter",):
self.assertLess(len(feed_item.description), 50, feed_item.title)

def test_simple_build_item_delimiter_empty(self):
with tempfile.TemporaryDirectory() as tmpdirname:
cli_result = self.build_docs_setup(
testproject_path="docs",
mkdocs_yml_filepath=Path(
"tests/fixtures/mkdocs_item_delimiter_empty.yml"
),
output_path=tmpdirname,
strict=True,
)
if cli_result.exception is not None:
e = cli_result.exception
logger.debug(format_exception(type(e), e, e.__traceback__))

self.assertEqual(cli_result.exit_code, 0)
self.assertIsNone(cli_result.exception)

# created items
feed_parsed = feedparser.parse(Path(tmpdirname) / OUTPUT_RSS_FEED_CREATED)
self.assertEqual(feed_parsed.bozo, 0)

for feed_item in feed_parsed.entries:
if feed_item.title in ("Page without meta with early delimiter",):
self.assertGreater(len(feed_item.description), 150, feed_item.title)

def test_simple_build_locale_with_territory(self):
with tempfile.TemporaryDirectory() as tmpdirname:
Expand Down

0 comments on commit 0fb63da

Please sign in to comment.