From 0c3210f8bf1b22e6d6411f3409e8f063e1623796 Mon Sep 17 00:00:00 2001
From: jo-elimu <1451036+jo-elimu@users.noreply.github.com>
Date: Fri, 16 Aug 2024 14:53:56 +0700
Subject: [PATCH 1/3] feat(ml): predict reading level during epub import
Resolves #1821
---
pom.xml | 6 +++
.../StoryBookCreateFromEPubController.java | 47 +++++++++++++++++++
.../web/content/storybook/step2_2_model.pmml | 43 +++++++++++++++++
3 files changed, 96 insertions(+)
create mode 100644 src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml
diff --git a/pom.xml b/pom.xml
index e6f226567..834da26db 100644
--- a/pom.xml
+++ b/pom.xml
@@ -287,6 +287,12 @@
model-${model.version}
+
+ org.pmml4s
+ pmml4s_3
+ 1.0.1
+
+
commons-fileupload
commons-fileupload
diff --git a/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java b/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java
index 8ae19115b..0d529b9a4 100644
--- a/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java
+++ b/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java
@@ -19,6 +19,7 @@
import ai.elimu.model.contributor.Contributor;
import ai.elimu.model.contributor.ImageContributionEvent;
import ai.elimu.model.contributor.StoryBookContributionEvent;
+import ai.elimu.model.v2.enums.ReadingLevel;
import ai.elimu.model.v2.enums.content.ImageFormat;
import ai.elimu.util.DiscordHelper;
import ai.elimu.util.ImageColorHelper;
@@ -39,6 +40,7 @@
import java.util.Arrays;
import java.util.Calendar;
import java.util.List;
+import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.commons.io.FileUtils;
@@ -404,6 +406,11 @@ public String handleSubmit(
storyBookParagraphDao.create(storyBookParagraph);
}
}
+
+ ReadingLevel predictedReadingLevel = predictReadingLevel(storyBook);
+ logger.info("predictedReadingLevel: " + predictedReadingLevel);
+ storyBook.setReadingLevel(predictedReadingLevel);
+ storyBookDao.update(storyBook);
if (!EnvironmentContextLoaderListener.PROPERTIES.isEmpty()) {
String contentUrl = "https://" + EnvironmentContextLoaderListener.PROPERTIES.getProperty("content.language").toLowerCase() + ".elimu.ai/content/storybook/edit/" + storyBook.getId();
@@ -518,4 +525,44 @@ private void storeImageContributionEvent(Image image, HttpSession session, HttpS
);
}
}
+
+ private ReadingLevel predictReadingLevel(StoryBook storyBook) {
+ logger.info("predictReadingLevel");
+
+ // Load the machine learning model (https://github.com/elimu-ai/ml-storybook-reading-level)
+ String modelFilePath = getClass().getResource("step2_2_model.pmml").getFile();
+ logger.info("modelFilePath: " + modelFilePath);
+ org.pmml4s.model.Model model = org.pmml4s.model.Model.fromFile(modelFilePath);
+ logger.info("model: " + model);
+
+ // Prepare values (features) to pass to the model
+ Map values = Map.of(
+ "id", Double.valueOf(storyBook.getId())
+ );
+ logger.info("values: " + values);
+
+ // Make prediction
+ logger.info("Arrays.toString(model.inputNames()): " + Arrays.toString(model.inputNames()));
+ Object[] valuesMap = Arrays.stream(model.inputNames())
+ .map(values::get)
+ .toArray();
+ logger.info("valuesMap: " + valuesMap);
+ Object[] results = model.predict(valuesMap);
+ logger.info("results: " + results);
+ logger.info("Arrays.toString(results): " + Arrays.toString(results));
+ Object result = results[0];
+ logger.info("result: " + result);
+ logger.info("result.getClass().getSimpleName(): " + result.getClass().getSimpleName());
+ Double resultAsDouble = (Double) result;
+ logger.info("resultAsDouble: " + resultAsDouble);
+ Integer resultAsInteger = resultAsDouble.intValue();
+ logger.info("resultAsInteger: " + resultAsInteger);
+
+ // Convert from number to ReadingLevel enum (e.g. "LEVEL2")
+ String readingLevelAsString = "LEVEL" + resultAsInteger;
+ logger.info("readingLevelAsString: " + readingLevelAsString);
+ ReadingLevel readingLevel = ReadingLevel.valueOf(readingLevelAsString);
+ logger.info("readingLevel: " + readingLevel);
+ return readingLevel;
+ }
}
diff --git a/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml b/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml
new file mode 100644
index 000000000..8aa0dec4b
--- /dev/null
+++ b/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml
@@ -0,0 +1,43 @@
+
+
+
+
+ 2024-08-16T03:26:47Z
+
+
+ PMMLPipeline(steps=[('regressor', DecisionTreeRegressor(random_state=1))])
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
From cbe82d5b49a591c2a29d413e3b72b6106b5edf4f Mon Sep 17 00:00:00 2001
From: jo-elimu <1451036+jo-elimu@users.noreply.github.com>
Date: Sat, 17 Aug 2024 14:45:19 +0700
Subject: [PATCH 2/3] feat(ml): add chapter_count
ref #1821
---
.../StoryBookCreateFromEPubController.java | 7 ++++---
.../web/content/storybook/step2_2_model.pmml | 21 +++++++++----------
2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java b/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java
index 0d529b9a4..416985bbc 100644
--- a/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java
+++ b/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java
@@ -407,7 +407,7 @@ public String handleSubmit(
}
}
- ReadingLevel predictedReadingLevel = predictReadingLevel(storyBook);
+ ReadingLevel predictedReadingLevel = predictReadingLevel(storyBook, storyBookChapters);
logger.info("predictedReadingLevel: " + predictedReadingLevel);
storyBook.setReadingLevel(predictedReadingLevel);
storyBookDao.update(storyBook);
@@ -526,7 +526,7 @@ private void storeImageContributionEvent(Image image, HttpSession session, HttpS
}
}
- private ReadingLevel predictReadingLevel(StoryBook storyBook) {
+ private ReadingLevel predictReadingLevel(StoryBook storyBook, List storyBookChapters) {
logger.info("predictReadingLevel");
// Load the machine learning model (https://github.com/elimu-ai/ml-storybook-reading-level)
@@ -537,7 +537,8 @@ private ReadingLevel predictReadingLevel(StoryBook storyBook) {
// Prepare values (features) to pass to the model
Map values = Map.of(
- "id", Double.valueOf(storyBook.getId())
+ "id", Double.valueOf(storyBook.getId()),
+ "chapter_count", Double.valueOf(storyBookChapters.size())
);
logger.info("values: " + values);
diff --git a/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml b/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml
index 8aa0dec4b..13342d16b 100644
--- a/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml
+++ b/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml
@@ -2,41 +2,40 @@
- 2024-08-16T03:26:47Z
+ 2024-08-17T07:35:44Z
-
- PMMLPipeline(steps=[('regressor', DecisionTreeRegressor(random_state=1))])
-
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
+
From 8be95c85204b314f52a306cea19610f56e7602e1 Mon Sep 17 00:00:00 2001
From: jo-elimu <1451036+jo-elimu@users.noreply.github.com>
Date: Sat, 17 Aug 2024 19:07:32 +0700
Subject: [PATCH 3/3] feat(ml): add word_count
refs #1821
---
.../StoryBookCreateFromEPubController.java | 20 ++++++--
.../web/content/storybook/step2_2_model.pmml | 51 +++++++++++++++----
2 files changed, 56 insertions(+), 15 deletions(-)
diff --git a/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java b/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java
index 416985bbc..5f7856387 100644
--- a/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java
+++ b/src/main/java/ai/elimu/web/content/storybook/StoryBookCreateFromEPubController.java
@@ -407,7 +407,18 @@ public String handleSubmit(
}
}
- ReadingLevel predictedReadingLevel = predictReadingLevel(storyBook, storyBookChapters);
+ List chapters = storyBookChapterDao.readAll(storyBook);
+ int chapterCount = chapters.size();
+ int paragraphCount = 0;
+ int wordCount = 0;
+ for (StoryBookChapter chapter : chapters) {
+ List paragraphs = storyBookParagraphDao.readAll(chapter);
+ paragraphCount += paragraphs.size();
+ for (StoryBookParagraph paragraph : paragraphs) {
+ wordCount += paragraph.getOriginalText().split(" ").length;
+ }
+ }
+ ReadingLevel predictedReadingLevel = predictReadingLevel(chapterCount, paragraphCount, wordCount);
logger.info("predictedReadingLevel: " + predictedReadingLevel);
storyBook.setReadingLevel(predictedReadingLevel);
storyBookDao.update(storyBook);
@@ -526,7 +537,7 @@ private void storeImageContributionEvent(Image image, HttpSession session, HttpS
}
}
- private ReadingLevel predictReadingLevel(StoryBook storyBook, List storyBookChapters) {
+ private ReadingLevel predictReadingLevel(int chapterCount, int paragraphCount, int wordCount) {
logger.info("predictReadingLevel");
// Load the machine learning model (https://github.com/elimu-ai/ml-storybook-reading-level)
@@ -537,8 +548,9 @@ private ReadingLevel predictReadingLevel(StoryBook storyBook, List values = Map.of(
- "id", Double.valueOf(storyBook.getId()),
- "chapter_count", Double.valueOf(storyBookChapters.size())
+ "chapter_count", Double.valueOf(chapterCount),
+ "paragraph_count", Double.valueOf(paragraphCount),
+ "word_count", Double.valueOf(wordCount)
);
logger.info("values: " + values);
diff --git a/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml b/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml
index 13342d16b..882a938fd 100644
--- a/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml
+++ b/src/main/resources/ai/elimu/web/content/storybook/step2_2_model.pmml
@@ -2,40 +2,69 @@
- 2024-08-17T07:35:44Z
+ 2024-08-17T11:40:01Z
-
+
+
-
+
+
-
-
+
+
+
+
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+