Skip to content

Commit

Permalink
Merge branch 'google:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
shitamo authored Jan 5, 2025
2 parents a833ae9 + 86e5da1 commit 61db065
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 116 deletions.
1 change: 1 addition & 0 deletions src/converter/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,7 @@ mozc_cc_test(
"//request:conversion_request",
"//request:request_test_util",
"//rewriter",
"//rewriter:date_rewriter",
"//rewriter:rewriter_interface",
"//testing:gunit_main",
"//testing:mozctest",
Expand Down
28 changes: 28 additions & 0 deletions src/converter/converter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
#include "protocol/user_dictionary_storage.pb.h"
#include "request/conversion_request.h"
#include "request/request_test_util.h"
#include "rewriter/date_rewriter.h"
#include "rewriter/rewriter.h"
#include "rewriter/rewriter_interface.h"
#include "testing/gmock.h"
Expand Down Expand Up @@ -2311,4 +2312,31 @@ TEST_F(ConverterTest, IntegrationWithCalculatorRewriter) {
}
}

TEST_F(ConverterTest, IntegrationWithDateRewriter) {
MockDictionary dictionary;
// Since DateRewriter is not used in some build targets, the test needs to
// explicitly add it to the converter.
std::unique_ptr<Converter> converter = CreateConverter(
std::make_unique<DateRewriter>(&dictionary), STUB_PREDICTOR);

{
Segments segments;
const ConversionRequest convreq =
ConversionRequestBuilder().SetKey("へいせい30ねん").Build();
ASSERT_TRUE(converter->StartConversion(convreq, &segments));
EXPECT_EQ(segments.conversion_segments_size(), 1);
EXPECT_TRUE(FindCandidateByValue("2018年", segments.conversion_segment(0)));
}

{
Segments segments;
const ConversionRequest convreq =
ConversionRequestBuilder().SetKey("794ねん").Build();
ASSERT_TRUE(converter->StartConversion(convreq, &segments));
EXPECT_EQ(segments.conversion_segments_size(), 1);
EXPECT_TRUE(
FindCandidateByValue("延暦13年", segments.conversion_segment(0)));
}
}

} // namespace mozc
2 changes: 0 additions & 2 deletions src/rewriter/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,6 @@ mozc_cc_test(
"//dictionary:dictionary_interface",
"//dictionary:dictionary_mock",
"//dictionary:dictionary_token",
"//engine",
"//engine:mock_data_engine_factory",
"//protocol:commands_cc_proto",
"//protocol:config_cc_proto",
"//request:conversion_request",
Expand Down
90 changes: 46 additions & 44 deletions src/rewriter/date_rewriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include <cstdint>
#include <cstdio>
#include <iterator>
#include <limits>
#include <memory>
#include <optional>
#include <string>
Expand Down Expand Up @@ -1121,30 +1122,23 @@ bool DateRewriter::RewriteAd(Segments::range segments_range,
// `RewriteEra` which supports multiple segments without merging, this function
// needs to produce a candidate for 2 segments (the era and the digits), which
// isn't easy.
bool DateRewriter::ResizeSegmentsForRewriteAd(
const ConversionRequest &request, Segments::const_range segments_range,
Segments *segments) const {
if (segments_range.empty()) {
LOG(WARNING) << "No candidates are found";
return false;
}
if (segments->resized()) {
// If the given segments are resized by user, don't modify anymore.
return false;
}

std::optional<RewriterInterface::ResizeSegmentsRequest>
DateRewriter::CheckResizeSegmentsForAd(const ConversionRequest &request,
const Segments &segments,
const size_t segment_index) const {
// Find the first segment that ends with `kNenKey`.
constexpr size_t kMaxSegments = 3; // Only up to 3 segments.
bool has_suffix = false;
bool should_resize_last_segment = false;
std::vector<absl::string_view> keys;
for (const Segment &segment : segments_range) {
for (const Segment &segment :
segments.conversion_segments().drop(segment_index)) {
const absl::string_view key{segment.key()};
if (auto pos = key.find(kNenKey); pos != absl::string_view::npos) {
if (pos == 0 && keys.size() == 1) {
// If the second key starts with the `kNenKey`, `RewriteAd()` can handle
// it without resizing.
return false;
return std::nullopt;
}
pos += kNenKey.size();
if (pos == key.size()) {
Expand All @@ -1158,41 +1152,33 @@ bool DateRewriter::ResizeSegmentsForRewriteAd(
break;
}
if (keys.size() >= kMaxSegments - 1) {
return false;
return std::nullopt;
}
keys.push_back(key);
}
if (!has_suffix || (keys.size() <= 1 && !should_resize_last_segment)) {
return false;
return std::nullopt;
}
const std::string key = absl::StrJoin(keys, "");
DCHECK(!key.empty());
const size_t key_len = Util::CharsLen(key);
if (key_len > std::numeric_limits<uint8_t>::max()) {
return std::nullopt;
}
const uint8_t segment_size = static_cast<uint8_t>(key_len);

// Try to convert era to AD.
const std::vector<std::pair<std::string, std::string>>
results_anddescriptions = EraToAd(key);
if (results_anddescriptions.empty()) {
return false;
return std::nullopt;
}

return ResizeSegments(request, segments_range.begin(), key, segments);
}

// Extend or shrink the `*segments_begin` to the `key`.
bool DateRewriter::ResizeSegments(const ConversionRequest &request,
Segments::const_iterator segments_begin,
const absl::string_view key,
Segments *segments) const {
const absl::string_view key0 = segments_begin->key();
DCHECK_NE(key.size(), key0.size());
const int diff = Util::CharsLen(key) - Util::CharsLen(key0);
const size_t segment_index = segments_begin - segments->all().begin();
if (!parent_converter_->ResizeSegment(segments, request, segment_index,
diff)) {
LOG(ERROR) << "Failed to merge conversion segments";
return false;
}
return true;
ResizeSegmentsRequest resize_request = {
.segment_index = segment_index,
.segment_sizes = { segment_size, 0, 0, 0, 0, 0, 0, 0 },
};
return resize_request;
}

namespace {
Expand Down Expand Up @@ -1503,6 +1489,31 @@ std::string GetExtraFormat(const dictionary::DictionaryInterface *dictionary) {
}
} // namespace

std::optional<RewriterInterface::ResizeSegmentsRequest>
DateRewriter::CheckResizeSegmentsRequest(const ConversionRequest &request,
const Segments &segments) const {
if (!request.config().use_date_conversion()) {
MOZC_VLOG(2) << "no use_date_conversion";
return std::nullopt;
}

if (segments.resized()) {
// If the given segments are resized by user, don't modify anymore.
return std::nullopt;
}

for (size_t segment_index = 0;
segment_index < segments.conversion_segments_size(); ++segment_index) {
std::optional<RewriterInterface::ResizeSegmentsRequest> resize_request =
CheckResizeSegmentsForAd(request, segments, segment_index);
if (resize_request.has_value()) {
return resize_request;
}
}

return std::nullopt;
}

bool DateRewriter::Rewrite(const ConversionRequest &request,
Segments *segments) const {
if (!request.config().use_date_conversion()) {
Expand All @@ -1523,15 +1534,6 @@ bool DateRewriter::Rewrite(const ConversionRequest &request,
return false;
}

if (ResizeSegmentsForRewriteAd(request, rest_segments, segments)) {
// Return without further rewrites when segments were resized. Views for
// `segments` may be invalidated.
// `ResizeSegment()` calls `Rewriter::Rewrite()`, which recursively calls
// `DateRewriter::Rewrite()` with merged segments. Other rewrites were
// done by the recursive call.
return true;
}

if (RewriteAd(rest_segments, num_done) ||
RewriteDate(seg, extra_format, num_done) ||
RewriteEra(rest_segments, num_done)) {
Expand Down
23 changes: 12 additions & 11 deletions src/rewriter/date_rewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@

#include <cstddef>
#include <cstdint>
#include <optional>
#include <string>
#include <utility>
#include <vector>

#include "absl/strings/string_view.h"
#include "composer/composer.h"
#include "converter/converter_interface.h"
#include "converter/segments.h"
#include "dictionary/dictionary_interface.h"
#include "request/conversion_request.h"
Expand All @@ -62,12 +62,15 @@ struct DateCandidate {
class DateRewriter : public RewriterInterface {
public:
DateRewriter() = default;
explicit DateRewriter(const ConverterInterface *parent_converter,
const dictionary::DictionaryInterface *dictionary)
: parent_converter_(parent_converter), dictionary_(dictionary) {}
explicit DateRewriter(const dictionary::DictionaryInterface *dictionary)
: dictionary_(dictionary) {}

int capability(const ConversionRequest &request) const override;

std::optional<ResizeSegmentsRequest> CheckResizeSegmentsRequest(
const ConversionRequest &request,
const Segments &segments) const override;

bool Rewrite(const ConversionRequest &request,
Segments *segments) const override;

Expand Down Expand Up @@ -157,12 +160,11 @@ class DateRewriter : public RewriterInterface {
size_t &num_done_out);
static bool RewriteEra(Segments::range segments_range, size_t &num_done_out);
static bool RewriteAd(Segments::range segments_range, size_t &num_done_out);
bool ResizeSegmentsForRewriteAd(const ConversionRequest &request,
Segments::const_range segments_range,
Segments *segments) const;
bool ResizeSegments(const ConversionRequest &request,
Segments::const_iterator segments_begin,
absl::string_view key, Segments *segments) const;

// Returns the value if rewrite for AD wants to resize the segments.
std::optional<ResizeSegmentsRequest> CheckResizeSegmentsForAd(
const ConversionRequest &request, const Segments &segments,
size_t segment_index) const;

// When only one conversion segment has consecutive number characters,
// this function adds date and time candidates.
Expand All @@ -187,7 +189,6 @@ class DateRewriter : public RewriterInterface {
absl::string_view str,
std::vector<date_rewriter_internal::DateCandidate> *results);

const ConverterInterface *const parent_converter_ = nullptr;
const dictionary::DictionaryInterface *const dictionary_ = nullptr;
};

Expand Down
Loading

0 comments on commit 61db065

Please sign in to comment.