From 645be3e57a40b326a6d7b350eac23b076e1d417a Mon Sep 17 00:00:00 2001 From: Russ Poetker Date: Wed, 18 Dec 2024 14:29:23 -0500 Subject: [PATCH 1/2] Cleanse nihms email content --- .../pass/deposit/service/NihmsReceiveMailService.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pass-deposit-services/deposit-core/src/main/java/org/eclipse/pass/deposit/service/NihmsReceiveMailService.java b/pass-deposit-services/deposit-core/src/main/java/org/eclipse/pass/deposit/service/NihmsReceiveMailService.java index 6ee8ce4e..0aeef643 100644 --- a/pass-deposit-services/deposit-core/src/main/java/org/eclipse/pass/deposit/service/NihmsReceiveMailService.java +++ b/pass-deposit-services/deposit-core/src/main/java/org/eclipse/pass/deposit/service/NihmsReceiveMailService.java @@ -31,6 +31,7 @@ import jakarta.mail.internet.AddressException; import jakarta.mail.internet.InternetAddress; import jakarta.mail.internet.MimeMessage; +import org.apache.commons.lang3.StringUtils; import org.eclipse.pass.deposit.provider.nihms.NihmsAssembler; import org.eclipse.pass.support.client.PassClient; import org.eclipse.pass.support.client.PassClientSelector; @@ -99,14 +100,15 @@ public void handleReceivedMail(MimeMessage receivedMessage) { } LOG.warn("Email is from Nihms"); String content = getHtmlText(receivedMessage); - LOG.warn("Nihms Email content:" + content); + String cleansedContent = StringUtils.normalizeSpace(content); + LOG.warn("Nihms Email content:" + cleansedContent); if (Objects.isNull(content)) { LOG.error("No HTML content found in nihms email: " + receivedMessage.getSubject()); return; } - Elements messageElements = getMessageElements(content); + Elements messageElements = getMessageElements(cleansedContent); if (messageElements.isEmpty()) { - LOG.error("No messages found in nihms email: " + content); + LOG.error("No messages found in nihms email: " + cleansedContent); return; } processMessages(messageElements); From 2fc41bbb941decbb4af6b612ee88c9fff778c98d Mon Sep 17 00:00:00 2001 From: Russ Poetker Date: Wed, 18 Dec 2024 14:51:47 -0500 Subject: [PATCH 2/2] Move cleanse to MailUtil --- .../org/eclipse/pass/deposit/service/MailUtil.java | 11 ++++++++--- .../pass/deposit/service/NihmsReceiveMailService.java | 8 +++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/pass-deposit-services/deposit-core/src/main/java/org/eclipse/pass/deposit/service/MailUtil.java b/pass-deposit-services/deposit-core/src/main/java/org/eclipse/pass/deposit/service/MailUtil.java index 30df01fc..dc5641f3 100644 --- a/pass-deposit-services/deposit-core/src/main/java/org/eclipse/pass/deposit/service/MailUtil.java +++ b/pass-deposit-services/deposit-core/src/main/java/org/eclipse/pass/deposit/service/MailUtil.java @@ -21,6 +21,7 @@ import jakarta.mail.MessagingException; import jakarta.mail.Multipart; import jakarta.mail.Part; +import org.apache.commons.lang3.StringUtils; /** * @author Russ Poetker (rpoetke1@jh.edu) @@ -31,7 +32,7 @@ private MailUtil() {} static String getHtmlText(Part part) throws MessagingException, IOException { if (part.isMimeType("text/html")) { - return part.getContent().toString(); + return cleanseContent(part.getContent().toString()); } if (part.isMimeType("multipart/alternative")) { @@ -40,7 +41,7 @@ static String getHtmlText(Part part) throws MessagingException, IOException { for (int i = 0; i < count; i++) { Part bodyPart = multipart.getBodyPart(i); if (bodyPart.isMimeType("text/html")) { - return bodyPart.getContent().toString(); + return cleanseContent(bodyPart.getContent().toString()); } else if (bodyPart.isMimeType("multipart/*")) { return getHtmlText(bodyPart); } @@ -52,11 +53,15 @@ static String getHtmlText(Part part) throws MessagingException, IOException { Part bodyPart = multipart.getBodyPart(i); String content = getHtmlText(bodyPart); if (Objects.nonNull(content)) { - return content; + return cleanseContent(content); } } } return null; } + + private static String cleanseContent(String content) { + return StringUtils.normalizeSpace(content); + } } diff --git a/pass-deposit-services/deposit-core/src/main/java/org/eclipse/pass/deposit/service/NihmsReceiveMailService.java b/pass-deposit-services/deposit-core/src/main/java/org/eclipse/pass/deposit/service/NihmsReceiveMailService.java index 0aeef643..32e9ac65 100644 --- a/pass-deposit-services/deposit-core/src/main/java/org/eclipse/pass/deposit/service/NihmsReceiveMailService.java +++ b/pass-deposit-services/deposit-core/src/main/java/org/eclipse/pass/deposit/service/NihmsReceiveMailService.java @@ -31,7 +31,6 @@ import jakarta.mail.internet.AddressException; import jakarta.mail.internet.InternetAddress; import jakarta.mail.internet.MimeMessage; -import org.apache.commons.lang3.StringUtils; import org.eclipse.pass.deposit.provider.nihms.NihmsAssembler; import org.eclipse.pass.support.client.PassClient; import org.eclipse.pass.support.client.PassClientSelector; @@ -100,15 +99,14 @@ public void handleReceivedMail(MimeMessage receivedMessage) { } LOG.warn("Email is from Nihms"); String content = getHtmlText(receivedMessage); - String cleansedContent = StringUtils.normalizeSpace(content); - LOG.warn("Nihms Email content:" + cleansedContent); + LOG.warn("Nihms Email content: {}", content); if (Objects.isNull(content)) { LOG.error("No HTML content found in nihms email: " + receivedMessage.getSubject()); return; } - Elements messageElements = getMessageElements(cleansedContent); + Elements messageElements = getMessageElements(content); if (messageElements.isEmpty()) { - LOG.error("No messages found in nihms email: " + cleansedContent); + LOG.error("No messages found in nihms email: {}", content); return; } processMessages(messageElements);