fcitx · wengxt · Apr 25, 2026 · Apr 25, 2026 · Apr 25, 2026
diff --git a/src/libime/core/historybigram.cpp b/src/libime/core/historybigram.cpp
@@ -96,6 +96,8 @@ struct WeightedTrie {
         auto v = trie_.traverse(wordAndCode.first, pos);
         if (TrieType::isValid(v)) {
             result += v;
+        } else if (TrieType::isNoPath(v)) {
+            return 0;
         }
         const char separator[] = {wordCodeSeparator, '\0'};
         v = trie_.traverse(separator, pos);
@@ -395,6 +397,32 @@ class HistoryBigramPool {
         bigram_.fillPredict(words, word, maxSize);
     }
 
+    bool maybeAppendToLatestSentence(const std::vector<WordWithCode> &context,
+                                     std::vector<WordWithCode> &newSentence) {
+        if (recent_.empty() || newSentence.empty()) {
+            return false;
+        }
+        auto &latestSentence = recent_.front();
+        if (latestSentence.size() < context.size() ||
+            !std::ranges::equal(
+                context,
+                std::views::drop(latestSentence,
+                                 latestSentence.size() - context.size()))) {
+            return false;
+        }
+
+        const int delta = 1;
+        decBigram(latestSentence.back(), {"</s>", ""}, delta);
+        for (auto &item : newSentence) {
+            unigram_.incFreq(item, delta);
+            incBigram(latestSentence.back(), item, delta);
+            latestSentence.push_back(std::move(item));
+        }
+        incBigram(latestSentence.back(), {"</s>", ""}, delta);
+
+        return true;
+    }
+
 private:
     template <typename R>
     void remove(const R &sentence) {
@@ -742,4 +770,13 @@ float HistoryBigram::scoreWithCode(
         {cur ? cur->word() : "", extractor && cur ? extractor(cur) : ""});
 }
 
+void HistoryBigram::addWithContext(const std::vector<WordWithCode> &context,
+                                   std::vector<WordWithCode> newSentence) {
+    FCITX_D();
+    if (context.empty() ||
+        !d->pools_[0].maybeAppendToLatestSentence(context, newSentence)) {
+        addWithCode(newSentence);
+    }
+}
+
 } // namespace libime
diff --git a/src/libime/core/historybigram.h b/src/libime/core/historybigram.h
@@ -102,6 +102,9 @@ class LIBIMECORE_EXPORT HistoryBigram {
     int32_t rawBigramFrequency(WordWithCodeView prev,
                                WordWithCodeView cur) const;
 
+    void addWithContext(const std::vector<WordWithCode> &context,
+                        std::vector<WordWithCode> newSentence);
+
 private:
     std::unique_ptr<HistoryBigramPrivate> d_ptr;
     FCITX_DECLARE_PRIVATE(HistoryBigram);

diff --git a/src/libime/pinyin/pinyincontext.cpp b/src/libime/pinyin/pinyincontext.cpp
@@ -919,10 +919,10 @@ std::vector<std::string> PinyinContext::selectedWords() const {
     return newSentence;
 }
 
-std::vector<std::pair<std::string, std::string>>
+std::vector<HistoryBigram::WordWithCode>
 PinyinContext::selectedWordsWithPinyin() const {
     FCITX_D();
-    std::vector<std::pair<std::string, std::string>> newSentence;
+    std::vector<HistoryBigram::WordWithCode> newSentence;
     for (const auto &s : d->selected_) {
         for (const auto &item : s) {
             if (item.type_ != SelectedPinyinType::Separator) {
@@ -976,31 +976,30 @@ void PinyinContext::learn() {
         return;
     }
 
+    std::vector<HistoryBigram::WordWithCode> newSentence;
     if (auto [result, encodedWordPinyin] = d->learnWord();
         result != LearnWordResult::Ignored) {
         // Do not insert custom to history for the first time.
         if (result == LearnWordResult::Normal) {
             // Create new sentence with the whole new learned word.
-            std::vector<HistoryBigram::WordWithCode> newSentence{
-                {sentence(), encodedWordPinyin}};
-            d->ime_->model()->history().addWithCode(newSentence);
+            newSentence.push_back({sentence(), encodedWordPinyin});
+        } else {
+            return;
         }
     } else {
-        std::vector<HistoryBigram::WordWithCode> newSentence;
-        for (auto &s : d->selected_) {
-            for (auto &item : s) {
-                if (item.type_ != SelectedPinyinType::Separator) {
-                    // Non pinyin word. Skip it.
-                    if (item.encodedPinyin().empty()) {
-                        return;
-                    }
-                    newSentence.push_back(
-                        {item.word_.word(), item.encodedPinyin()});
-                }
-            }
-        }
-        d->ime_->model()->history().addWithCode(newSentence);
+        newSentence = selectedWordsWithPinyin();
+    }
+
+    if (std::ranges::any_of(newSentence, [](const auto &word) {
+            return word.second.empty();
+        })) {
+        // Don't add to history if there is any non-pinyin word.
+        return;
     }
+
+    auto context = contextWordsWithPinyin();
+    d->ime_->model()->history().addWithContext(contextWordsWithPinyin(),
+                                               std::move(newSentence));
 }
 
 void PinyinContext::setContextWords(

diff --git a/src/libime/pinyin/pinyinprediction.cpp b/src/libime/pinyin/pinyinprediction.cpp
@@ -48,12 +48,12 @@ PinyinPrediction::predict(const State &state,
 
     if (lastEncodedPinyin.empty() || sentence.empty()) {
         auto result = Prediction::predictWithScore(state, sentence, maxSize);
-        std::transform(result.begin(), result.end(),
-                       std::back_inserter(finalResult),
-                       [](std::pair<std::string, float> &value) {
-                           return std::make_pair(std::move(value.first),
-                                                 PinyinPredictionSource::Model);
-                       });
+        std::ranges::transform(result, std::back_inserter(finalResult),
+                               [](std::pair<std::string, float> &value) {
+                                   return std::make_pair(
+                                       std::move(value.first),
+                                       PinyinPredictionSource::Model);
+                               });
         return finalResult;
     }
 
@@ -119,11 +119,9 @@ PinyinPrediction::predict(const State &state,
 
                 dup.insert(std::get<std::string>(newItem));
                 intermedidateResult.push_back(std::move(newItem));
-                std::push_heap(intermedidateResult.begin(),
-                               intermedidateResult.end(), cmp);
+                std::ranges::push_heap(intermedidateResult, cmp);
                 while (intermedidateResult.size() > maxSize) {
-                    std::pop_heap(intermedidateResult.begin(),
-                                  intermedidateResult.end(), cmp);
+                    std::ranges::pop_heap(intermedidateResult, cmp);
                     dup.erase(
                         std::get<std::string>(intermedidateResult.back()));
                     intermedidateResult.pop_back();

diff --git a/test/testhistorybigram.cpp b/test/testhistorybigram.cpp
@@ -12,6 +12,7 @@
 #include <string>
 #include <unordered_set>
 #include <fcitx-utils/log.h>
+#include <fcitx-utils/stringutils.h>
 #include "libime/core/historybigram.h"
 
 namespace {
@@ -266,6 +267,24 @@ void testWithCodePredict() {
     }
 }
 
+void testAppend() {
+    using namespace libime;
+    HistoryBigram history;
+    history.addWithCode({{"你", "code1"}, {"是", "code2"}, {"一个", "code3"}});
+
+    history.addWithContext({{"是", "code2"}, {"一个", "code3"}},
+                           {{"好人", "code4"}});
+
+    history.addWithContext({{"不是", "code5"}}, {{"你的", "code6"}});
+    std::stringstream ss;
+    history.dump(ss);
+    auto lines = fcitx::stringutils::split(ss.str(), "\n");
+    FCITX_ASSERT(lines.size() == 2) << lines.size();
+    FCITX_ASSERT(lines[0] == "你的\tcode6") << lines[0];
+    FCITX_ASSERT(lines[1] == "你\tcode1 是\tcode2 一个\tcode3 好人\tcode4")
+        << lines[1];
+}
+
 } // namespace
 
 int main() {
@@ -276,5 +295,6 @@ int main() {
     testSaveAndLoadText();
     testWithCode();
     testWithCodePredict();
+    testAppend();
     return 0;
 }
diff --git a/test/testpinyinprediction.cpp b/test/testpinyinprediction.cpp
@@ -10,6 +10,7 @@
 #include "libime/core/userlanguagemodel.h"
 #include "libime/pinyin/pinyindictionary.h"
 #include "libime/pinyin/pinyinencoder.h"
+#include "libime/pinyin/pinyinime.h"
 #include "libime/pinyin/pinyinprediction.h"
 #include "testdir.h"
 
@@ -33,14 +34,21 @@ LogMessageBuilder &operator<<(LogMessageBuilder &log,
 } // namespace fcitx
 
 int main() {
-    UserLanguageModel model(LIBIME_BINARY_DIR "/data/sc.lm");
-    PinyinDictionary dict;
-    dict.load(PinyinDictionary::SystemDict,
-              LIBIME_BINARY_DIR "/data/dict_sc.txt", PinyinDictFormat::Text);
+    PinyinIME ime(
+        std::make_unique<PinyinDictionary>(),
+        std::make_unique<UserLanguageModel>(LIBIME_BINARY_DIR "/data/sc.lm"));
+    ime.setNBest(2);
+    ime.dict()->load(PinyinDictionary::SystemDict,
+                     LIBIME_BINARY_DIR "/data/sc.dict",
+                     PinyinDictFormat::Binary);
+    ime.model()->history().addWithCode({{"可", "JF"}});
+    auto &model = *ime.model();
+    auto &dict = *ime.dict();
 
     PinyinPrediction prediction;
-    prediction.setUserLanguageModel(&model);
-    prediction.setPinyinDictionary(&dict);
+    prediction.setUserLanguageModel(ime.model());
+    prediction.setPinyinDictionary(ime.dict());
+
     auto py = PinyinEncoder::encodeFullPinyin("zhong'guo");
     auto result = prediction.predict(model.nullState(), {"我", "喜欢", "中国"},
                                      {py.data(), py.size()}, 20);