mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 16:26:59 +00:00
llama: update vendored code to commit 40c6d79f (#7875)
This commit is contained in:
35
llama/unicode.cpp
vendored
35
llama/unicode.cpp
vendored
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* llama.cpp - commit 3f1ae2e32cde00c39b96be6d01c2997c29bae555 - do not edit this file
|
||||
* llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
@@ -154,11 +154,11 @@ uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
|
||||
static std::vector<codepoint_flags> unicode_cpt_flags_array() {
|
||||
std::vector<codepoint_flags> cpt_flags(MAX_CODEPOINTS, codepoint_flags::UNDEFINED);
|
||||
|
||||
assert (unicode_ranges_flags.front().first == 0);
|
||||
assert (unicode_ranges_flags.back().first == MAX_CODEPOINTS);
|
||||
assert (unicode_ranges_flags.begin()[0].first == 0);
|
||||
assert (unicode_ranges_flags.begin()[unicode_ranges_flags.size()-1].first == MAX_CODEPOINTS);
|
||||
for (size_t i = 1; i < unicode_ranges_flags.size(); ++i) {
|
||||
const auto range_ini = unicode_ranges_flags[i-1]; // codepoint_ini, flags
|
||||
const auto range_end = unicode_ranges_flags[i]; // codepoint_end, flags
|
||||
const auto range_ini = unicode_ranges_flags.begin()[i-1]; // codepoint_ini, flags
|
||||
const auto range_end = unicode_ranges_flags.begin()[i]; // codepoint_end, flags
|
||||
for (uint32_t cpt = range_ini.first; cpt < range_end.first; ++cpt) {
|
||||
cpt_flags[cpt] = range_ini.second;
|
||||
}
|
||||
@@ -247,7 +247,19 @@ static inline std::wstring unicode_wstring_from_utf8(const std::string & s) {
|
||||
free(wbuf);
|
||||
return ret;
|
||||
#else
|
||||
|
||||
#if defined(__clang__)
|
||||
// disable C++17 deprecation warning for std::codecvt_utf8
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||
#endif
|
||||
|
||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
|
||||
|
||||
#if defined(__clang__)
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
return conv.from_bytes(s);
|
||||
#endif
|
||||
}
|
||||
@@ -644,7 +656,7 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
|
||||
std::vector<uint32_t> result(cpts.size());
|
||||
for (size_t i = 0; i < cpts.size(); ++i) {
|
||||
const uint32_t cpt = cpts[i];
|
||||
auto it = std::upper_bound(unicode_ranges_nfd.cbegin(), unicode_ranges_nfd.cend(), cpt, comp) - 1;
|
||||
auto it = std::upper_bound(unicode_ranges_nfd.begin(), unicode_ranges_nfd.end(), cpt, comp) - 1;
|
||||
result[i] = (it->first <= cpt && cpt <= it->last) ? it->nfd : cpt;
|
||||
}
|
||||
return result;
|
||||
@@ -686,8 +698,15 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8) {
|
||||
}
|
||||
|
||||
uint32_t unicode_tolower(uint32_t cp) {
|
||||
auto it = unicode_map_lowercase.find(cp);
|
||||
return it == unicode_map_lowercase.end() ? cp : it->second;
|
||||
// binary search
|
||||
auto it = std::lower_bound(unicode_map_lowercase.begin(), unicode_map_lowercase.end(), cp,
|
||||
[](const std::pair<uint32_t, uint32_t> & pair, uint32_t value) {
|
||||
return pair.first < value;
|
||||
});
|
||||
if (it != unicode_map_lowercase.end() && it->first == cp) {
|
||||
return it->second;
|
||||
}
|
||||
return cp; // Return the original code point if no lowercase mapping is found
|
||||
}
|
||||
|
||||
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs) {
|
||||
|
||||
Reference in New Issue
Block a user