œ_#ÁÕ§TE NAŒ“KeÉ:”(åŽÖJÞùY’‚ñùž7; «]Û ý`8g“¯B© jdÖÖ¸ðzœ¸¦4Ç3Kó^(ÍÖ¼ Õ€pvìwšõB4df$Èü^0˜…åÌC$#2FŽÑ§±¦ÛZ/÷š&m£ñzÒÖ ’.Î]!Î;ƒ(Õ–¢d/—#Kª+tZyuÏB>NÛÖ†(¸ŒSà'³„Y˜´-_•¦¼´˜OlNK§¶ÒàŠˆTHµƒeTPå·fïM’…þuÏÍüp6دªE£åü‡ZØ'CKF#â«;‹eyO Qp„†l"ö1èíÙP ÏŒúl! BÝ2ñª•_VÁÉ÷3eu`–F¸ìI--ö<¿žë¯4õ캿¢)34Å{wMÉ2ÆÖFŸ¥`e9Ú¶¸P‡.”FÔï rY ‚²ÈTB,{ÛœéJ}«àQ4¹0Rû4D‚B§S‘ dO•v¾„™Sן¯3FeŸ™«+ÓâwH dÕÛÌì·P4ë&¥#rÜÉ Ù¦ê†ý·xòqk¯2,¹§™E\ék‚×Sá”ÚºÙ⺷ö£6…à ʾ qSá³Å|;àû}4Ÿ($â¹VY~óÍ!èÜÒŒËX½Ù1j‚VíÍŸš³+œ]«½g{_{/vµ½\¢¶vÉWKÿ:ñám½ ¥ S²x‘t ŽšÝÙÿÀÇ^ný PK IW™k‚½÷ á _rels/.relsUT dìd dìd dìd’ÏNÃ0‡ï{ŠÈ÷ÕÝ@¡¥» ¤Ý*`%îÑ&QâÁöö‚J£ì°cœŸ¿|¶²ÙÆA½rL½wVE Šñ¶w†çúay * 9Kƒw¬áÈ ¶ÕbóÄIîI]’Ê—4t"á1™ŽGJ…ìòMããH’±Å@æ…ZÆuYÞ`üÍ€jÂT;«!îì T}|Û7MoøÞ›ýÈNN<|v–í2ÄÜ¥ÏèšbË¢Ázó˜Ë )„"£OÏ7ú{ZYÈ’yÞç#1'tuÉM?6o>Z´_å9›ëKÚ˜}?þ³žÏÌ·N>fµx PK IWª½e ¢ U € word/document.xmlUT dìdPK IWþË3” z €J¢ word/settings.xmlUT dìdPK IWC‡{š' ƒ €¤ docProps/custom.xmlUT dìdPK IW츱=Œ €‡¥ [Content_Types].xmlUT dìdPK IWV%ë±" €U§ docProps/app.xmlUT dìdPK IW€RŒ 3 €¶¨ docProps/core.xmlUT dìdPK IWkòDn ô €ª word/_rels/document.xml.relsUT dìdPK IW;$î €Î« word/fontTable.xmlUT dìdPK IW+åäz] ÷. €ý¬ word/numbering.xmlUT dìdPK IW¤2×r- ¿ €›° word/styles.xmlUT dìdPK IWMFÒ ø €´ word/header1.xmlUT dìdPK IWF— T e €· word/media/image1.jpegUT dìdPK IW!Yéáå €°Ë word/media/image2.pngUT dìdPK IW°Àºë ú €ÙÌ word/media/image3.pngUT dìdPK IW$“†ª L €Î word/footer1.xmlUT dìdPK IWzaGôM €ñÑ word/footer2.xmlUT dìdPK IW–µâº P €}Õ word/theme/theme1.xmlUT dìdPK IW™k‚½÷ á €{Û _rels/.relsUT PK ! bîh^ [Content_Types].xml ¢( ¬”ËNÃ0E÷HüCä-Jܲ@5í‚Ç*Q>Àēƪc[žiiÿž‰ûB¡j7±ÏÜ{2ñÍh²nm¶‚ˆÆ»R‹ÈÀU^7/ÅÇì%¿’rZYï @1__f› ˜q·ÃR4DáAJ¬h>€ãÚÇV߯¹ªZ¨9ÈÛÁàNVÞ8Ê©ÓãÑÔji){^óã-I‹"{Üv^¥P!XS)bR¹rú—K¾s(¸3Õ`cÞ0†½ÝÎß»¾7M4²©ŠôªZÆk+¿|\|z¿(Ž‹ôPúº6h_-[ž@!‚ÒØ Pk‹´2nÏ}Ä?£LËð Ýû%áÄßdºždN"m,à¥ÇžDO97*‚~§Èɸ8ÀOíc|n¦Ñ äEøÿöéºóÀBÉÀ!$}‡íàÈé;{ìÐå[ƒîñ–é2þ ÿÿ PK ! µU0#ô L _rels/.rels ¢( ¬’MOÃ0†ïHü‡È÷ÕÝBKwAH»!T~€Iܵ£$Ý¿'TƒG½~üÊÛÝ<êÈ!öâ4¬‹;#¶w†—úqu *&r–Fq¬áÄvÕõÕö™GJy(v½*«¸¨¡KÉß#FÓñD±Ï.W ¥†=™ZÆMYÞbø®ÕBS톰·7 ê“Ï›×–¦é ?ˆ9LìÒ™ÈsbgÙ®|Èl!õùUSh9i°bžr:"y_dlÀóD›¿ý|-NœÈR"4ø2ÏGÇ% õZ´4ñËyÄ7 ëÈðÉ‚‹¨Þ ÿÿ PK ! Q48wÛ — xl/workbook.xml¤UÙnâ0}iþ!cñ‡ *–¢AšVU×$dC¬&vÆv UÕŸë@XÊK§/¹p|Žï¹N÷b“¥Ö •Š ÞC¸î"‹òHÄŒ¯zèá~b·‘¥4á1I§=ôJºèÿüÑ] ù¼âÙ ®z(Ñ:GE ͈ª‹œrˆ,…̈†©\9*—”Ä*¡Tg©ã¹nàd„q´Eåg0ÄrÉ":Q‘Q®· ’¦D}•°\UhYô¸ŒÈç"·#‘å ±`)Ó¯%(²²(œ®¸d‘‚ì nZ w v¡ñª• t¶TÆ")”Xê:@;[Ògú±ë`|²›ó=ø’ïHúÂL÷¬dðEVÁ+8€a÷Ûh¬Uz%„Íû"ZsÏÍCýî’¥ôqk]‹äù5ÉL¦Rd¥Dé˘i÷P ¦bM/|dÉ",…¨çãFNoçiûéë>aêiçsó#ðÄ ÕTr¢éHp ÜIú®ÝJìQ"ÀÜÖ-ý[0I¡¦ÀZ Z…d¡nˆN¬B¦=4 g %PDF-1.4 %âãÏÓ 3 0 obj << /Linearized 1 /L 422775 ÿØÿà JFIF ÿÛ C ÿÛ C ÿÀ X" ÿÄ ÿÄ H !1A"Qaq2‘¡#±ÁBRÑ3Cbrá$S‚¢²ð4ñ%6DTc’ÂsÿÄ ÿÄ = !1AQ"aq‘Á2R¡±BÑð#3br’²4á$‚¢ÂñÿÚ ? áHBßÝ`„! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! ! stream
#ifndef UNF_NORMALIZER_HH
#define UNF_NORMALIZER_HH
#include <vector>
#include <string>
#include <algorithm>
#include <cstring>
#include "trie/searcher.hh"
#include "trie/char_stream.hh"
#include "table.hh"
#include "util.hh"
namespace UNF {
class Normalizer {
public:
enum Form { FORM_NFD, FORM_NFC, FORM_NFKD, FORM_NFKC };
public:
Normalizer()
: nf_d(TABLE::NODES, TABLE::CANONICAL_DECOM_ROOT, (const char *)TABLE::STRINGS),
nf_kd(TABLE::NODES, TABLE::COMPATIBILITY_DECOM_ROOT, (const char *)TABLE::STRINGS),
nf_c(TABLE::NODES, TABLE::CANONICAL_COM_ROOT, (const char *)TABLE::STRINGS),
nf_c_qc(TABLE::NODES, TABLE::NFC_ILLEGAL_ROOT),
nf_kc_qc(TABLE::NODES, TABLE::NFKC_ILLEGAL_ROOT),
ccc(TABLE::NODES, TABLE::CANONICAL_CLASS_ROOT)
{}
const char* normalize(const char* src, Form form) {
switch(form) {
case FORM_NFD: return nfd(src);
case FORM_NFC: return nfc(src);
case FORM_NFKD: return nfkd(src);
case FORM_NFKC: return nfkc(src);
default: return src;
}
}
const char* nfd(const char* src) { return decompose(src, nf_d); }
const char* nfkd(const char* src) { return decompose(src, nf_kd); }
const char* nfc(const char* src) { return compose(src, nf_c_qc, nf_d); }
const char* nfkc(const char* src) { return compose(src, nf_kc_qc, nf_kd); }
private:
const char* decompose(const char* src, const Trie::NormalizationForm& nf) {
const char* beg = next_invalid_char(src, nf);
if(*beg=='\0')
return src;
buffer.assign(src, beg);
do {
const char* end = next_valid_starter(beg, nf);
decompose_one(beg, end, nf, buffer);
beg = next_invalid_char(end, nf);
buffer.append(end, beg);
} while(*beg!='\0');
return buffer.c_str();
}
void decompose_one(const char* beg, const char* end, const Trie::NormalizationForm& nf, std::string& buf) {
unsigned last = buf.size();
nf.decompose(Trie::RangeCharStream(beg,end), buf);
char* bufbeg = const_cast<char*>(buf.data());
canonical_combining_class_ordering(bufbeg+last, bufbeg+buf.size());
}
const char* compose(const char* src, const Trie::NormalizationForm& nf, const Trie::NormalizationForm& nf_decomp) {
const char* beg = next_invalid_char(src, nf);
if(*beg=='\0')
return src;
buffer.assign(src, beg);
while(*beg!='\0') {
const char* end = next_valid_starter(beg, nf);
buffer2.clear();
decompose_one(beg, end, nf_decomp, buffer2);
end = compose_one(buffer2.c_str(), end, buffer);
beg = next_invalid_char(end, nf);
buffer.append(end, beg);
}
return buffer.c_str();
}
const char* compose_one(const char* starter, const char* rest_starter, std::string& buf) {
Trie::CharStreamForComposition in(starter, rest_starter, canonical_classes, buffer3);
while(in.within_first())
nf_c.compose(in, buf);
return in.cur();
}
void canonical_combining_class_ordering(char* beg, const char* end) {
canonical_classes.assign(end-beg+1, 0); // +1 is for sentinel value
ccc.sort(beg, canonical_classes);
}
const char* next_invalid_char(const char* src, const Trie::NormalizationForm& nf) const {
int last_canonical_class = 0;
const char* cur = Util::nearest_utf8_char_start_point(src);
const char* starter = cur;
for(; *cur != '\0'; cur = Util::nearest_utf8_char_start_point(cur+1)) {
int canonical_class = ccc.get_class(cur);
if(last_canonical_class > canonical_class && canonical_class != 0)
return starter;
if(nf.quick_check(cur)==false)
return starter;
if(canonical_class==0)
starter=cur;
last_canonical_class = canonical_class;
}
return cur;
}
const char* next_valid_starter(const char* src, const Trie::NormalizationForm& nf) const {
const char* cur = Util::nearest_utf8_char_start_point(src+1);
while(ccc.get_class(cur)!=0 || nf.quick_check(cur)==false)
cur = Util::nearest_utf8_char_start_point(cur+1);
return cur;
}
private:
const Trie::NormalizationForm nf_d;
const Trie::NormalizationForm nf_kd;
const Trie::NormalizationForm nf_c;
const Trie::NormalizationForm nf_c_qc;
const Trie::NormalizationForm nf_kc_qc;
const Trie::CanonicalCombiningClass ccc;
std::string buffer;
std::string buffer2;
std::string buffer3;
std::vector<unsigned char> canonical_classes;
};
}
#endif