Skip to content

Commit

Permalink
fix: html cleanup (#76)
Browse files Browse the repository at this point in the history
* feat: stop converting to markdown, clean html

- remove remark, don't convert html to markdown
- cleanup html
  - add class "pretty-print" to <pre><code>
  - add <a> for [text](link) markdown
  - add <code> for `text` code formatting

* fix: cleanup html, replace [text][uid] with xref tags
  • Loading branch information
eaball35 authored Oct 11, 2021
1 parent d4ae2c5 commit 2b49e89
Show file tree
Hide file tree
Showing 39 changed files with 209 additions and 145 deletions.
5 changes: 0 additions & 5 deletions third_party/docfx-doclet-143274/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,6 @@
<artifactId>jackson-dataformat-yaml</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>com.overzealous</groupId>
<artifactId>remark</artifactId>
<version>${remark.version}</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
Expand Down
Binary file not shown.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,9 @@ void addClassInfo(TypeElement classElement, MetadataFile classMetadataFile) {
}

String getDeprecatedSummary(String depMsg, String summary){
String result = "(deprecated) " + depMsg;
String result = "<p>(deprecated) " + depMsg + "</p>";
if (summary != null && !summary.equals("")) {
result = result + " - " + summary;
result = result + "\n" + summary;
}
return result;
}
Expand Down Expand Up @@ -495,7 +495,7 @@ void populateUidValues(List<MetadataFile> packageMetadataFiles, List<MetadataFil
LookupContext lookupContext = lookup.buildContext(classMetadataFile);

for (MetadataFileItem item : classMetadataFile.getItems()) {
item.setSummary(YamlUtil.convertHtmlToMarkdown(
item.setSummary(YamlUtil.cleanupHtml(
populateUidValues(item.getSummary(), lookupContext)
));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.microsoft.lookup.model.ExtendedMetadataFileItem;
import com.microsoft.model.*;
import com.microsoft.util.YamlUtil;
import com.sun.source.doctree.DocCommentTree;
import com.sun.source.doctree.DocTree;
import com.sun.source.doctree.LinkTree;
Expand Down Expand Up @@ -174,7 +175,7 @@ protected String determineComment(T element) {
* </ul>
*/
String replaceLinksAndCodes(List<? extends DocTree> items) {
return items.stream().map(
return YamlUtil.cleanupHtml(items.stream().map(
bodyItem -> {
switch (bodyItem.getKind()) {
case LINK:
Expand All @@ -188,7 +189,7 @@ String replaceLinksAndCodes(List<? extends DocTree> items) {
return String.valueOf(bodyItem);
}
}
).collect(Collectors.joining());
).collect(Collectors.joining()));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,9 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator.Feature;
import com.overzealous.remark.IgnoredHtmlElement;
import com.overzealous.remark.Options;
import com.overzealous.remark.Remark;
import org.apache.commons.lang3.StringUtils;

public class YamlUtil {

/**
* Same instance of {@link Remark} class reused for better performance according to authors recommendations.
* <p>
* It wrapped in ThreadLocal because of its non-thread safe nature
*/
private static ThreadLocal<Remark> remark = new ThreadLocal<>() {
@Override
protected Remark initialValue() {
Options options = Options.github();
options.fencedCodeBlocksWidth = 3;
options.ignoredHtmlElements.add(IgnoredHtmlElement.create("xref", "uid", "data-throw-if-not-resolved"));
return new Remark(options);
}
};

private static final ObjectMapper mapper = new ObjectMapper(new YAMLFactory()
.disable(Feature.WRITE_DOC_START_MARKER)
.disable(Feature.SPLIT_LINES)
Expand All @@ -42,12 +23,15 @@ public static String objectToYamlString(Object object) {
}
}

public static String convertHtmlToMarkdown(String text) {
public static String cleanupHtml(String text) {
if (StringUtils.isBlank(text)) {
return text;
}
return remark.get().convertFragment(text)
.replaceAll("\r\n", "\n")
.replaceAll("\n\n```\n", "\n\n```java\n");
return text.replaceAll("<pre>([^<]+)</pre>","$1")
.replaceAll("<pre><code>", "<pre><code class=\"pretty-print\">")
.replaceAll("`([^`]+)`", "<code>$1</code>")
.replaceAll("\\[([^]]+)]\\(([^)]+)\\)", "<a href=\"$2\">$1</a>")
.replaceAll("\\[([^]]+)]\\[([^]]+)\\]", "<xref uid=\"$2\" data-throw-if-not-resolved=\"false\">$1</xref>")
.replaceAll("==+([^=]+)==+", "<h2>$1</h2>");
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,51 @@
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This package contains the sample set of classes for testing DocFx doclet.
* The interfaces provided are listed below, along with usage samples.
*
* <p>======================= SpeechClient =======================
*
* <p>Service Description: Service that implements Google Cloud Speech API.
*
* <p>Sample for SpeechClient:
*
* <pre>{@code
* try (SpeechClient speechClient = SpeechClient.create()) {
* RecognitionConfig config = RecognitionConfig.newBuilder().build();
* RecognitionAudio audio = RecognitionAudio.newBuilder().build();
* RecognizeResponse response = speechClient.recognize(config, audio);
* }
* }</pre>
*
* <p>======================= AdaptationClient =======================
*
* <p>Service Description: Service that implements Google Cloud Speech Adaptation API.
*
* <p>Sample for AdaptationClient:
*
* <pre>{@code
* try (AdaptationClient adaptationClient = AdaptationClient.create()) {
* LocationName parent = LocationName.of("[PROJECT]", "[LOCATION]");
* PhraseSet phraseSet = PhraseSet.newBuilder().build();
* String phraseSetId = "phraseSetId959902180";
* PhraseSet response = adaptationClient.createPhraseSet(parent, phraseSet, phraseSetId);
* }
* }</pre>
*/
@Generated("by gapic-generator-java")
package com.microsoft.samples;

import javax.annotation.Generated;
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.UUID;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.*;

public class YamlUtilTest {

Expand Down Expand Up @@ -45,29 +45,92 @@ public void objectToYamlString() {
+ " description: \"Some desc 5\"\n");
}

private MetadataFileItem buildMetadataFileItem(int seed) {
MetadataFileItem metadataFileItem = new MetadataFileItem("Some uid " + seed);
metadataFileItem.setId("Some id" + seed);
metadataFileItem.setHref("Some href" + seed);
metadataFileItem.setParameters(Collections.singletonList(
new MethodParameter("Some id " + seed, "Some type " + seed, "Some desc " + seed)));

return metadataFileItem;
}


@Test
public void convertHtmlToMarkdown() throws IOException {
String text = FileUtils.readFileToString(new File("target/test-classes/html2md/initial.html"), UTF_8);
String expectedResult = FileUtils.readFileToString(new File("target/test-classes/html2md/converted.md"), UTF_8);
public void cleanupHtmlRemoveLonePreTagsTest() {
String expectedActual = "<pre>text</pre>";
String expectedResult = "text";
String expectedWithCode = "<pre><code class=\"pretty-print\">text</code></pre>";
String random = UUID.randomUUID().toString();

assertEquals(expectedResult, YamlUtil.cleanupHtml(expectedActual));
assertEquals(random + expectedResult + random, YamlUtil.cleanupHtml(random + expectedActual + random));
assertEquals(expectedResult + random + expectedResult, YamlUtil.cleanupHtml(expectedActual + random + expectedActual));
assertEquals(expectedWithCode, YamlUtil.cleanupHtml(expectedWithCode));
}

String result = YamlUtil.convertHtmlToMarkdown(text);
@Test
public void cleanupHtmlIncludePrettyPrintTest() {
String expectedActual = "<pre><code>";
String expectedResult = "<pre><code class=\"pretty-print\">";
String random = UUID.randomUUID().toString();

assertEquals("Wrong result", result, expectedResult);
assertEquals(expectedResult, YamlUtil.cleanupHtml(expectedActual));
assertEquals(random + expectedResult + random, YamlUtil.cleanupHtml(random + expectedActual + random));
assertEquals(expectedResult + random + expectedResult, YamlUtil.cleanupHtml(expectedActual + random + expectedActual));
assertNotEquals(expectedResult, YamlUtil.cleanupHtml("<pre>" + random + "<code>"));
assertFalse(YamlUtil.cleanupHtml("<pre>" + random + "<code>").contains("class=\"pretty-print\""));
}

@Test
public void convertHtmlToMarkdownForBlankParam() {
assertNull("Wrong result for null", YamlUtil.convertHtmlToMarkdown(null));
assertEquals("Wrong result for empty string", YamlUtil.convertHtmlToMarkdown(""), "");
public void cleanupHtmlAddCodeTagsTest() {
String expectedActual = "`text`";
String expectedResult = "<code>text</code>";
String random = UUID.randomUUID().toString();

assertEquals(expectedResult, YamlUtil.cleanupHtml(expectedActual));
assertEquals(random + expectedResult + random, YamlUtil.cleanupHtml(random + expectedActual + random));
assertEquals(expectedResult + random + expectedResult, YamlUtil.cleanupHtml(expectedActual + random + expectedActual));
assertEquals("`" + expectedResult, YamlUtil.cleanupHtml("`" + expectedActual));
assertFalse(YamlUtil.cleanupHtml("`" + random).contains("<code>"));
}

private MetadataFileItem buildMetadataFileItem(int seed) {
MetadataFileItem metadataFileItem = new MetadataFileItem("Some uid " + seed);
metadataFileItem.setId("Some id" + seed);
metadataFileItem.setHref("Some href" + seed);
metadataFileItem.setParameters(Collections.singletonList(
new MethodParameter("Some id " + seed, "Some type " + seed, "Some desc " + seed)));
@Test
public void cleanupHtmlAddHrefTagsTest() {
String expectedActual = "[text](link)";
String expectedResult = "<a href=\"link\">text</a>";
String random = UUID.randomUUID().toString();

return metadataFileItem;
assertEquals(expectedResult, YamlUtil.cleanupHtml(expectedActual));
assertEquals(random + expectedResult + random, YamlUtil.cleanupHtml(random + expectedActual + random));
assertEquals(expectedResult + random + expectedResult, YamlUtil.cleanupHtml(expectedActual + random + expectedActual));
assertEquals("[text]](link)", YamlUtil.cleanupHtml("[text]](link)"));
assertFalse(YamlUtil.cleanupHtml("[text(link)]").contains("href"));
}

@Test
public void cleanupHtmlEqualTitlesTest() {
String expectedActual = "======================= SpeechClient =======================";
String expectedResult = "<h2> SpeechClient </h2>";
String random = UUID.randomUUID().toString();

assertEquals(expectedResult, YamlUtil.cleanupHtml(expectedActual));
assertEquals(random + expectedResult + random, YamlUtil.cleanupHtml(random + expectedActual + random));
assertEquals(expectedResult + random + expectedResult, YamlUtil.cleanupHtml(expectedActual + random + expectedActual));
assertEquals("= text =", YamlUtil.cleanupHtml("= text ="));
}

@Test
public void cleanupHtmlReferenceTest() {
String expectedActual = "[KeyRing][google.cloud.kms.v1.KeyRing]";
String expectedResult = "<xref uid=\"google.cloud.kms.v1.KeyRing\" data-throw-if-not-resolved=\"false\">KeyRing</xref>";
String random = UUID.randomUUID().toString();

assertEquals(expectedResult, YamlUtil.cleanupHtml(expectedActual));
assertEquals(random + expectedResult + random, YamlUtil.cleanupHtml(random + expectedActual + random));
assertEquals(expectedResult + random + expectedResult, YamlUtil.cleanupHtml(expectedActual + random + expectedActual));

assertEquals("[uid]][text]", YamlUtil.cleanupHtml("[uid]][text]"));
assertFalse(YamlUtil.cleanupHtml("[text[uid]]").contains("xref"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ items:
fullName: "com.microsoft.samples.BasePartnerComponent<TContext>"
type: "Class"
package: "com.microsoft.samples"
summary: "Holds common partner component properties and behavior. All components should inherit from this class. The context object type."
summary: "Holds common partner component properties and behavior. All components should inherit from this class. The context\n object type."
syntax:
content: "public abstract class BasePartnerComponent<TContext>"
typeParameters:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ items:
fullName: "com.microsoft.samples.IPartner"
type: "Interface"
package: "com.microsoft.samples"
summary: "The main entry point into using the partner SDK functionality. Represents a partner and encapsulates all the behavior attached to partners. Use this interface to get to the partner's customers, profiles, and customer orders, profiles and subscriptions and more."
summary: "The main entry point into using the partner SDK functionality. Represents a partner and encapsulates all the behavior\n attached to partners. Use this interface to get to the partner's customers, profiles, and customer orders, profiles\n and subscriptions and more."
syntax:
content: "public interface IPartner"
- uid: "com.microsoft.samples.IPartner.getCredentials()"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ items:
overridden: "com.microsoft.samples.subpackage.Person.getLastName()"
type: "Method"
package: "com.microsoft.samples"
summary: "Get capitalized last name. But it's not the end, because of multiline comment"
summary: "Get capitalized last name. But it's not the end,\n because of multiline comment"
syntax:
content: "public String getLastName()"
return:
Expand Down Expand Up @@ -254,7 +254,7 @@ items:
overload: "com.microsoft.samples.SuperHero.successfullyAttacked*"
type: "Method"
package: "com.microsoft.samples"
summary: "(deprecated) As of version 1.1, use . . . instead -\n\nThis is a simple description of the method. . . [Superman!][Superman]\n\n\n[Superman]: http://www.supermanisthegreatest.com"
summary: "<p>(deprecated) As of version 1.1, use . . . instead</p>\n<p>This is a simple description of the method. . .\n <a href=\"http://www.supermanisthegreatest.com\">Superman!</a>\n </p>"
syntax:
content: "public int successfullyAttacked(int incomingDamage, String damageType)"
parameters:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ items:
fullName: "com.microsoft.samples.agreements.AgreementDetailsCollectionOperations"
type: "Class"
package: "com.microsoft.samples.agreements"
summary: "(deprecated) Use <xref uid=\"com.microsoft.samples.agreements.AgreementMetaData\" data-throw-if-not-resolved=\"false\">AgreementMetaData</xref> instead. - Agreement details collection operations implementation class."
summary: "<p>(deprecated) Use <xref uid=\"com.microsoft.samples.agreements.AgreementMetaData\" data-throw-if-not-resolved=\"false\">AgreementMetaData</xref> instead.</p>\nAgreement details collection operations implementation class."
syntax:
content: "public class AgreementDetailsCollectionOperations extends BasePartnerComponentString implements IAgreementDetailsCollection"
inheritance:
Expand Down Expand Up @@ -66,7 +66,7 @@ items:
overload: "com.microsoft.samples.agreements.AgreementDetailsCollectionOperations.get*"
type: "Method"
package: "com.microsoft.samples.agreements"
summary: "(deprecated) Some text - Retrieves the agreement details."
summary: "<p>(deprecated) Some text</p>\nRetrieves the agreement details."
syntax:
content: "public ResourceCollection<AgreementMetaData> get()"
return:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ items:
fullName: "com.microsoft.samples.agreements.AgreementMetaData"
type: "Class"
package: "com.microsoft.samples.agreements"
summary: "The AgreementMetaData provides metadata about the agreement type that partner can provide confirmation of customer acceptance."
summary: "The AgreementMetaData provides metadata about the agreement type\n that partner can provide confirmation of customer acceptance."
syntax:
content: "public class AgreementMetaData"
inheritance:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ items:
fullName: "com.microsoft.samples.agreements.IAgreementDetailsCollection"
type: "Interface"
package: "com.microsoft.samples.agreements"
summary: "(deprecated) This one is deprecated :( - Encapsulates the operations on the agreement metadata collection."
summary: "<p>(deprecated) This one is deprecated :(</p>\nEncapsulates the operations on the agreement metadata collection."
syntax:
content: "public interface IAgreementDetailsCollection"
status: "deprecated"
Expand Down
Loading

0 comments on commit 2b49e89

Please sign in to comment.