Skip to content

Commit e5be811

Browse files
committed
upmendex: Better treatment of diacritical mark in Danish, Norwegian, Icelandic, Finnish
git-svn-id: svn://tug.org/texlive/trunk/Build@75367 c570f23f-e606-0410-a88d-b1316a301751
1 parent a2ecdd5 commit e5be811

File tree

2 files changed

+122
-13
lines changed

2 files changed

+122
-13
lines changed

source/texk/upmendex/ChangeLog

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
2025-06-01 TANAKA Takuji <[email protected]>
2+
3+
* fwrite.c: Support digraph AA and
4+
better treatment of Ø,ø,Ö,ö,Æ,æ,Ä,ä in Danish and Norwegian.
5+
Better treatment of Ø,ø,Ö,ö,Æ,æ,Ä,ä in Icelandic.
6+
Better treatment of Ö,ö,Ø,ø,Ő,ő,Õ,õ,Ü,ü,Ű,ű,Œ,œ,Æ,æ,Ä,ä in Finnish.
7+
18
2025-05-29 TANAKA Takuji <[email protected]>
29

310
* version 1.20 Stable version.

source/texk/upmendex/fwrite.c

Lines changed: 115 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -675,10 +675,10 @@ static void crcheck(char *lbuff, FILE *fp)
675675
static void index_normalize(UChar *istr, UChar *ini, int *chset)
676676
{
677677
int k, hi, lo, mi;
678-
UChar ch,src[2],dest[8],strX[4],strY[4],strZ[4];
678+
UChar ch,src[2],dest[8],strX[4],strY[4],strZ[4],strW[4];
679679
UChar32 c32;
680680
UErrorCode perr;
681-
UCollationResult order,order1;
681+
UCollationResult order,order1,order2,order3,order4,order5;
682682
UCollationStrength strgth;
683683
static int i_y_mode=0,o_o_mode=0,u_u_mode=0,v_w_mode=0,s_s_mode=0,t_t_mode=0;
684684

@@ -1018,26 +1018,70 @@ static void index_normalize(UChar *istr, UChar *ini, int *chset)
10181018
ini[0] = 0x21A; return;
10191019
}
10201020
}
1021-
if (ch==0x0D6||ch==0x0F6||ch==0x150||ch==0x151) {
1022-
/* check Ö,ö versus Ő,ő for Hungarian */
1021+
if (ch==0x0D6||ch==0x0F6||ch==0x150||ch==0x151
1022+
||ch==0x0D8||ch==0x0F8||ch==0x0D5||ch==0x0F5) {
1023+
/* check Ö,ö versus Ő,ő for Hungarian
1024+
Ø,ø versus Ö,ö for Danish, Norwegian
1025+
Ö,ö versus Ø,ø,Ő,ő,Õ,õ for Finnish SFS 4600 */
10231026
if (o_o_mode==0) {
10241027
strgth = ucol_getStrength(icu_collator);
10251028
ucol_setStrength(icu_collator, UCOL_PRIMARY);
10261029
strX[0] = 0x0D6; strX[1] = 0x00; /* Ö */
1027-
strY[0] = 0x150; strY[1] = 0x00; /* Ő */
1030+
strY[0] = 0x0D8; strY[1] = 0x00; /* Ø */
10281031
strZ[0] = 0x04F; strZ[1] = 0x00; /* O */
1029-
order = ucol_strcoll(icu_collator, strY, -1, strX, -1);
1030-
order1 = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
1031-
o_o_mode = (order==UCOL_EQUAL && order1!=UCOL_EQUAL) ? 2 : 1;
1032+
order = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
1033+
order1 = ucol_strcoll(icu_collator, strZ, -1, strY, -1);
1034+
if (order==UCOL_LESS || order1==UCOL_LESS) {
1035+
o_o_mode = 2;
1036+
} else {
1037+
o_o_mode = 1;
1038+
}
10321039
ucol_setStrength(icu_collator, strgth);
10331040
}
10341041
if (o_o_mode==2) {
1042+
strgth = ucol_getStrength(icu_collator);
1043+
ucol_setStrength(icu_collator, UCOL_SECONDARY);
1044+
strX[0] = 0x0D6; strX[1] = 0x00; /* Ö */
1045+
strY[0] = 0x0D8; strY[1] = 0x00; /* Ø */
1046+
strZ[0] = 0x150; strZ[1] = 0x00; /* Ő */
1047+
strW[0] = 0x0D5; strZ[1] = 0x00; /* Õ */
1048+
order2 = ucol_strcoll(icu_collator, strY, -1, strZ, -1);
1049+
order3 = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
1050+
order4 = ucol_strcoll(icu_collator, strY, -1, strX, -1);
1051+
order5 = ucol_strcoll(icu_collator, strW, -1, strX, -1);
1052+
if (order1==UCOL_LESS && order4==UCOL_LESS) {
1053+
o_o_mode = 3; /* O < Ø << Ö */
1054+
if (order2==UCOL_LESS)
1055+
o_o_mode = 4; /* O < Ø << Ö and O < Ø << Ő */
1056+
} else if (order==UCOL_LESS && order4==UCOL_GREATER) {
1057+
o_o_mode = 6; /* O < Ö << Ø */
1058+
if (order3==UCOL_GREATER)
1059+
o_o_mode = 7; /* O < Ö << Ø and O < Ö << Ő */
1060+
if (order3==UCOL_GREATER && order5==UCOL_GREATER)
1061+
o_o_mode = 8; /* O < Ö << Ø and O < Ö << Ő and O < Ö << Õ */
1062+
} else if (order==UCOL_LESS && order3==UCOL_GREATER) {
1063+
o_o_mode = 5; /* O < Ö << Ő */
1064+
}
1065+
ucol_setStrength(icu_collator, strgth);
1066+
}
1067+
if ((o_o_mode==3 && (ch==0x0D6||ch==0x0F6)) || /* Ö */
1068+
(o_o_mode==4 && (ch==0x150||ch==0x151||ch==0x0D6||ch==0x0F6)) || /* Ö,Ő */
1069+
(o_o_mode>=3 && o_o_mode<=4 && (ch==0x0D8||ch==0x0F8))) { /* Ø */
1070+
ini[0] = 0x0D8; /* Ø */
1071+
return;
1072+
}
1073+
if ((o_o_mode==5 && (ch==0x150||ch==0x151)) || /* Ő */
1074+
(o_o_mode==6 && (ch==0x0D8||ch==0x0F8)) || /* Ø */
1075+
(o_o_mode==7 && (ch==0x150||ch==0x151||ch==0x0D8||ch==0x0F8)) || /* Ő,Ø */
1076+
(o_o_mode==8 && (ch==0x150||ch==0x151||
1077+
ch==0x0D8||ch==0x0F8||ch==0x0D5||ch==0x0F5)) || /* Ő,Ø,Õ */
1078+
(o_o_mode>=5 && o_o_mode<=8 && (ch==0x0D6||ch==0x0F6))) { /* Ö */
10351079
ini[0] = 0x0D6; /* Ö */
10361080
return;
10371081
}
10381082
}
10391083
if (ch==0x0DC||ch==0x0FC||ch==0x170||ch==0x171) {
1040-
/* check Ü,ü versus Ű,ű for Hungarian */
1084+
/* check Ü,ü versus Ű,ű for Hungarian, and for Finnish SFS 4600 */
10411085
if (u_u_mode==0) {
10421086
strgth = ucol_getStrength(icu_collator);
10431087
ucol_setStrength(icu_collator, UCOL_PRIMARY);
@@ -1046,22 +1090,33 @@ static void index_normalize(UChar *istr, UChar *ini, int *chset)
10461090
strZ[0] = 0x055; strZ[1] = 0x00; /* U */
10471091
order = ucol_strcoll(icu_collator, strY, -1, strX, -1);
10481092
order1 = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
1049-
u_u_mode = (order==UCOL_EQUAL && order1!=UCOL_EQUAL) ? 2 : 1;
1093+
if (order==UCOL_EQUAL && order1!=UCOL_EQUAL) {
1094+
strZ[0] = 0x059; /* Y */
1095+
order1 = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
1096+
u_u_mode = (order1==UCOL_EQUAL) ? 3 : 2;
1097+
} else {
1098+
u_u_mode = 1;
1099+
}
10501100
ucol_setStrength(icu_collator, strgth);
10511101
}
10521102
if (u_u_mode==2) {
10531103
ini[0] = 0x0DC; /* Ü */
10541104
return;
1105+
} else if (o_o_mode==3) {
1106+
ini[0] = 0x059; /* Y */
1107+
return;
10551108
}
10561109
}
10571110
if (ch==0x0C6||ch==0x0E6||ch==0x152||ch==0x153||ch==0x132||ch==0x133
10581111
||ch==0x0DF||ch==0x1E9E||ch==0x13F||ch==0x140||ch==0x149||ch==0x490||ch==0x491) {
10591112
strX[0] = u_toupper(ch); strX[1] = 0x00; /* ex. "Æ" "Œ" */
10601113
switch (ch) {
10611114
case 0x0C6: case 0x0E6: /* Æ æ */
1062-
strZ[0] = 0x41; break; /* A */
1115+
strZ[0] = 0x41; /* A */
1116+
strW[0] = 0xC4; break; /* Ä */
10631117
case 0x152: case 0x153: /* Œ œ */
1064-
strZ[0] = 0x4F; break; /* O */
1118+
strZ[0] = 0x4F; /* O */
1119+
strW[0] = 0xD6; break; /* Ö */
10651120
case 0x0DF: case 0x1E9E: /* ß ẞ */
10661121
strZ[0] = 0x53; break; /* S */
10671122
case 0x132: case 0x133: /* IJ ij */
@@ -1080,6 +1135,21 @@ static void index_normalize(UChar *istr, UChar *ini, int *chset)
10801135
strZ[2] = 0x00; /* ex. "AZ" "OZ" "ГЯ" */
10811136
order = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
10821137
if (order==UCOL_GREATER) { ini[0]=strZ[0]; return; } /* not ligature */
1138+
1139+
if (ch==0x0C6||ch==0x0E6||ch==0x152||ch==0x153) {
1140+
/* check Æ,Œ versus Ä,Ö for Finnish */
1141+
strW[1] = 0x00;
1142+
strgth = ucol_getStrength(icu_collator);
1143+
ucol_setStrength(icu_collator, UCOL_PRIMARY);
1144+
order = ucol_strcoll(icu_collator, strW, -1, strX, -1);
1145+
ucol_setStrength(icu_collator, UCOL_SECONDARY);
1146+
order1 = ucol_strcoll(icu_collator, strW, -1, strX, -1);
1147+
strgth = ucol_getStrength(icu_collator);
1148+
if (order==UCOL_EQUAL) {
1149+
ini[0] = (order1==UCOL_GREATER) ? strX[0] : strW[0];
1150+
return;
1151+
}
1152+
}
10831153
}
10841154
else if ((is_latin(&ch)&&ch>0x7F)||
10851155
(is_cyrillic(&ch)&&(ch<0x410||ch==0x419||ch==0x439||ch>0x44F))||
@@ -1094,7 +1164,20 @@ static void index_normalize(UChar *istr, UChar *ini, int *chset)
10941164
strZ[0] = u_toupper(dest[0]); strZ[2] = 0x00; /* ex. "AZ" */
10951165
strX[0] = u_toupper(ch); strX[1] = 0x00; /* ex. "Å" */
10961166
order = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
1097-
if (order==UCOL_LESS) { ini[0]=strX[0]; return; } /* with diacritic */
1167+
if (order==UCOL_LESS) { /* with diacritic */
1168+
if (strX[0]!=0xC4) { /* Ä */
1169+
ini[0]=strX[0]; return;
1170+
}
1171+
strZ[0] = 0x0C6; strZ[1] = 0x00; /* Æ */
1172+
strgth = ucol_getStrength(icu_collator);
1173+
ucol_setStrength(icu_collator, UCOL_PRIMARY);
1174+
order = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
1175+
ucol_setStrength(icu_collator, UCOL_SECONDARY);
1176+
order1 = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
1177+
strgth = ucol_getStrength(icu_collator);
1178+
ini[0] = (order==UCOL_EQUAL && order1==UCOL_LESS) ? strZ[0] : strX[0];
1179+
return;
1180+
}
10981181
ch=dest[0]; /* without diacritic */
10991182
}
11001183
}
@@ -1151,6 +1234,25 @@ static void index_normalize(UChar *istr, UChar *ini, int *chset)
11511234
return;
11521235
}
11531236
}
1237+
/* AA for Norwegian, Danish */
1238+
if (strX[0]==0x41 && strX[1]==0x41) { /* AA */
1239+
strX[2]=L'\0';
1240+
strY[0]=0xC5; strY[1]=L'\0'; /* Å */
1241+
strZ[0]=0x41; strZ[1]=0x42; strZ[3]=L'\0'; /* AB */
1242+
order = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
1243+
ucol_setStrength(icu_collator, UCOL_PRIMARY);
1244+
order1 = ucol_strcoll(icu_collator, strY, -1, strX, -1);
1245+
strgth = ucol_getStrength(icu_collator);
1246+
if (order==UCOL_LESS) {
1247+
if (order1==UCOL_EQUAL) {
1248+
ini[0]=strY[0]; ini[1]=L'\0'; /* Å */
1249+
} else {
1250+
ini[0]=strX[0]; ini[1]=strX[1]; /* AA */
1251+
ini[2]=L'\0';
1252+
}
1253+
return;
1254+
}
1255+
}
11541256
/* other digraphs */
11551257
if(((strX[0]==0x43 || strX[0]==0x44 || strX[0]==0x50 || strX[0]==0x52 || strX[0]==0x53 || strX[0]==0x54 ||
11561258
strX[0]==0x58 || strX[0]==0x5A) && strX[1]==0x48) || /* CH DH PH RH SH TH XH ZH */

0 commit comments

Comments
 (0)