From 0c3210f8bf1b22e6d6411f3409e8f063e1623796 Mon Sep 17 00:00:00 2001 From: jo-elimu <1451036+jo-elimu@users.noreply.github.com> Date: Fri, 16 Aug 2024 14:53:56 +0700 Subject: [PATCH 1/3] feat(ml): predict reading level during epub import Resolves #1821 --- pom.xml | 6 +++ .../StoryBookCreateFromEPubController.java | 47 +++++++++++++++++++ .../web/content/storybook/step2_2_model.pmml | 43 +++++++++++++++++ 3 files changed, 96 insertions(+) create mode 100644 src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml diff --git a/pom.xml b/pom.xml index e6f226567..834da26db 100644 --- a/pom.xml +++ b/pom.xml @@ -287,6 +287,12 @@ model-${model.version} + + org.pmml4s + pmml4s_3 + 1.0.1 + + commons-fileupload commons-fileupload diff --git a/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java b/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java index 8ae19115b..0d529b9a4 100644 --- a/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java +++ b/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java @@ -19,6 +19,7 @@ import ai.elimu.model.contributor.Contributor; import ai.elimu.model.contributor.ImageContributionEvent; import ai.elimu.model.contributor.StoryBookContributionEvent; +import ai.elimu.model.v2.enums.ReadingLevel; import ai.elimu.model.v2.enums.content.ImageFormat; import ai.elimu.util.DiscordHelper; import ai.elimu.util.ImageColorHelper; @@ -39,6 +40,7 @@ import java.util.Arrays; import java.util.Calendar; import java.util.List; +import java.util.Map; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import org.apache.commons.io.FileUtils; @@ -404,6 +406,11 @@ public String handleSubmit( storyBookParagraphDao.create(storyBookParagraph); } } + + ReadingLevel predictedReadingLevel = predictReadingLevel(storyBook); + logger.info("predictedReadingLevel: " + predictedReadingLevel); + storyBook.setReadingLevel(predictedReadingLevel); + storyBookDao.update(storyBook); if (!EnvironmentContextLoaderListener.PROPERTIES.isEmpty()) { String contentUrl = "https://" + EnvironmentContextLoaderListener.PROPERTIES.getProperty("content.language").toLowerCase() + ".elimu.ai/content/storybook/edit/" + storyBook.getId(); @@ -518,4 +525,44 @@ private void storeImageContributionEvent(Image image, HttpSession session, HttpS ); } } + + private ReadingLevel predictReadingLevel(StoryBook storyBook) { + logger.info("predictReadingLevel"); + + // Load the machine learning model (https://github.com/elimu-ai/ml-storybook-reading-level) + String modelFilePath = getClass().getResource("step2_2_model.pmml").getFile(); + logger.info("modelFilePath: " + modelFilePath); + org.pmml4s.model.Model model = org.pmml4s.model.Model.fromFile(modelFilePath); + logger.info("model: " + model); + + // Prepare values (features) to pass to the model + Map values = Map.of( + "id", Double.valueOf(storyBook.getId()) + ); + logger.info("values: " + values); + + // Make prediction + logger.info("Arrays.toString(model.inputNames()): " + Arrays.toString(model.inputNames())); + Object[] valuesMap = Arrays.stream(model.inputNames()) + .map(values::get) + .toArray(); + logger.info("valuesMap: " + valuesMap); + Object[] results = model.predict(valuesMap); + logger.info("results: " + results); + logger.info("Arrays.toString(results): " + Arrays.toString(results)); + Object result = results[0]; + logger.info("result: " + result); + logger.info("result.getClass().getSimpleName(): " + result.getClass().getSimpleName()); + Double resultAsDouble = (Double) result; + logger.info("resultAsDouble: " + resultAsDouble); + Integer resultAsInteger = resultAsDouble.intValue(); + logger.info("resultAsInteger: " + resultAsInteger); + + // Convert from number to ReadingLevel enum (e.g. "LEVEL2") + String readingLevelAsString = "LEVEL" + resultAsInteger; + logger.info("readingLevelAsString: " + readingLevelAsString); + ReadingLevel readingLevel = ReadingLevel.valueOf(readingLevelAsString); + logger.info("readingLevel: " + readingLevel); + return readingLevel; + } } diff --git a/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml b/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml new file mode 100644 index 000000000..8aa0dec4b --- /dev/null +++ b/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml @@ -0,0 +1,43 @@ + + +
+ + 2024-08-16T03:26:47Z +
+ + PMMLPipeline(steps=[('regressor', DecisionTreeRegressor(random_state=1))]) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
From cbe82d5b49a591c2a29d413e3b72b6106b5edf4f Mon Sep 17 00:00:00 2001 From: jo-elimu <1451036+jo-elimu@users.noreply.github.com> Date: Sat, 17 Aug 2024 14:45:19 +0700 Subject: [PATCH 2/3] feat(ml): add chapter_count ref #1821 --- .../StoryBookCreateFromEPubController.java | 7 ++++--- .../web/content/storybook/step2_2_model.pmml | 21 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java b/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java index 0d529b9a4..416985bbc 100644 --- a/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java +++ b/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java @@ -407,7 +407,7 @@ public String handleSubmit( } } - ReadingLevel predictedReadingLevel = predictReadingLevel(storyBook); + ReadingLevel predictedReadingLevel = predictReadingLevel(storyBook, storyBookChapters); logger.info("predictedReadingLevel: " + predictedReadingLevel); storyBook.setReadingLevel(predictedReadingLevel); storyBookDao.update(storyBook); @@ -526,7 +526,7 @@ private void storeImageContributionEvent(Image image, HttpSession session, HttpS } } - private ReadingLevel predictReadingLevel(StoryBook storyBook) { + private ReadingLevel predictReadingLevel(StoryBook storyBook, List storyBookChapters) { logger.info("predictReadingLevel"); // Load the machine learning model (https://github.com/elimu-ai/ml-storybook-reading-level) @@ -537,7 +537,8 @@ private ReadingLevel predictReadingLevel(StoryBook storyBook) { // Prepare values (features) to pass to the model Map values = Map.of( - "id", Double.valueOf(storyBook.getId()) + "id", Double.valueOf(storyBook.getId()), + "chapter_count", Double.valueOf(storyBookChapters.size()) ); logger.info("values: " + values); diff --git a/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml b/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml index 8aa0dec4b..13342d16b 100644 --- a/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml +++ b/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml @@ -2,41 +2,40 @@
- 2024-08-16T03:26:47Z + 2024-08-17T07:35:44Z
- - PMMLPipeline(steps=[('regressor', DecisionTreeRegressor(random_state=1))]) - + + + + + + + + - - - - - - - + From 8be95c85204b314f52a306cea19610f56e7602e1 Mon Sep 17 00:00:00 2001 From: jo-elimu <1451036+jo-elimu@users.noreply.github.com> Date: Sat, 17 Aug 2024 19:07:32 +0700 Subject: [PATCH 3/3] feat(ml): add word_count refs #1821 --- .../StoryBookCreateFromEPubController.java | 20 ++++++-- .../web/content/storybook/step2_2_model.pmml | 51 +++++++++++++++---- 2 files changed, 56 insertions(+), 15 deletions(-) diff --git a/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java b/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java index 416985bbc..5f7856387 100644 --- a/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java +++ b/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java @@ -407,7 +407,18 @@ public String handleSubmit( } } - ReadingLevel predictedReadingLevel = predictReadingLevel(storyBook, storyBookChapters); + List chapters = storyBookChapterDao.readAll(storyBook); + int chapterCount = chapters.size(); + int paragraphCount = 0; + int wordCount = 0; + for (StoryBookChapter chapter : chapters) { + List paragraphs = storyBookParagraphDao.readAll(chapter); + paragraphCount += paragraphs.size(); + for (StoryBookParagraph paragraph : paragraphs) { + wordCount += paragraph.getOriginalText().split(" ").length; + } + } + ReadingLevel predictedReadingLevel = predictReadingLevel(chapterCount, paragraphCount, wordCount); logger.info("predictedReadingLevel: " + predictedReadingLevel); storyBook.setReadingLevel(predictedReadingLevel); storyBookDao.update(storyBook); @@ -526,7 +537,7 @@ private void storeImageContributionEvent(Image image, HttpSession session, HttpS } } - private ReadingLevel predictReadingLevel(StoryBook storyBook, List storyBookChapters) { + private ReadingLevel predictReadingLevel(int chapterCount, int paragraphCount, int wordCount) { logger.info("predictReadingLevel"); // Load the machine learning model (https://github.com/elimu-ai/ml-storybook-reading-level) @@ -537,8 +548,9 @@ private ReadingLevel predictReadingLevel(StoryBook storyBook, List values = Map.of( - "id", Double.valueOf(storyBook.getId()), - "chapter_count", Double.valueOf(storyBookChapters.size()) + "chapter_count", Double.valueOf(chapterCount), + "paragraph_count", Double.valueOf(paragraphCount), + "word_count", Double.valueOf(wordCount) ); logger.info("values: " + values); diff --git a/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml b/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml index 13342d16b..882a938fd 100644 --- a/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml +++ b/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml @@ -2,40 +2,69 @@
- 2024-08-17T07:35:44Z + 2024-08-17T11:40:01Z
- + + - + + - - + + + + + - - - + + + + + + + + + + + + + + + - + - - + + + + + + + + + + + + + +