001 | typedef unsigned char u8; |
002 | typedef unsigned short u16; |
003 |
004 | #define ARABIC_SIN 0 |
005 | #define ARABIC_END 1 |
006 | #define ARABIC_1ST 2 |
007 | #define ARABIC_MID 3 |
008 |
009 | u16 ArabicEncode(u16 *arab, u8 pos, BOOL * stepit, BOOL * is2set) |
010 | { |
011 | u16 ucode = 0; |
012 | BOOL nowChar = FALSE; |
013 | switch (*arab) |
014 | { |
015 | // 2개일 경우 |
016 | case 0x0622: ucode = 0xFE81; nowChar = TRUE; break ; |
017 | case 0x0623: ucode = 0xFE83; nowChar = TRUE; break ; |
018 | case 0x0624: ucode = 0xFE85; nowChar = TRUE; break ; |
019 | case 0x0625: ucode = 0xFE87; nowChar = TRUE; break ; |
020 | case 0x0627: ucode = 0xFE8D; nowChar = TRUE; break ; |
021 | case 0x0629: ucode = 0xFE93; nowChar = TRUE; break ; |
022 | case 0x062F: ucode = 0xFEA9; nowChar = TRUE; break ; |
023 | case 0x0630: ucode = 0xFEAB; nowChar = TRUE; break ; |
024 | case 0x0631: ucode = 0xFEAD; nowChar = TRUE; break ; |
025 | case 0x0632: ucode = 0xFEAF; nowChar = TRUE; break ; |
026 | case 0x0648: ucode = 0xFEED; nowChar = TRUE; break ; |
027 | case 0x0649: ucode = 0xFEEF; nowChar = TRUE; break ; |
028 |
029 | // 4개 일경우 |
030 | case 0x0626: ucode = 0xFE89; nowChar = FALSE; break ; |
031 | case 0x0628: ucode = 0xFE8F; nowChar = FALSE; break ; |
032 | case 0x062A: ucode = 0xFE95; nowChar = FALSE; break ; |
033 | case 0x062B: ucode = 0xFE99; nowChar = FALSE; break ; |
034 | case 0x062C: ucode = 0xFE9D; nowChar = FALSE; break ; |
035 | case 0x062D: ucode = 0xFEA1; nowChar = FALSE; break ; |
036 | case 0x062E: ucode = 0xFEA5; nowChar = FALSE; break ; |
037 | case 0x0633: ucode = 0xFEB1; nowChar = FALSE; break ; |
038 | case 0x0634: ucode = 0xFEB5; nowChar = FALSE; break ; |
039 | case 0x0635: ucode = 0xFEB9; nowChar = FALSE; break ; |
040 | case 0x0636: ucode = 0xFEBD; nowChar = FALSE; break ; |
041 | case 0x0637: ucode = 0xFEC1; nowChar = FALSE; break ; |
042 | case 0x0638: ucode = 0xFEC5; nowChar = FALSE; break ; |
043 | case 0x0639: ucode = 0xFEC9; nowChar = FALSE; break ; |
044 | case 0x063A: ucode = 0xFECD; nowChar = FALSE; break ; |
045 | case 0x0641: ucode = 0xFED1; nowChar = FALSE; break ; |
046 | case 0x0642: ucode = 0xFED5; nowChar = FALSE; break ; |
047 | case 0x0643: ucode = 0xFED9; nowChar = FALSE; break ; |
048 | case 0x0644: ucode = 0xFEDD; nowChar = FALSE; break ; |
049 | case 0x0645: ucode = 0xFEE1; nowChar = FALSE; break ; |
050 | case 0x0646: ucode = 0xFEE5; nowChar = FALSE; break ; |
051 | case 0x0647: ucode = 0xFEE9; nowChar = FALSE; break ; |
052 | case 0x064A: ucode = 0xFEF1; nowChar = FALSE; break ; |
053 | } |
054 |
055 | if ((*arab == 0x0644) && (pos != ARABIC_END) && (pos != ARABIC_SIN)) |
056 | { |
057 | switch (*(arab+1)) |
058 | { |
059 | case 0x0622: ucode = 0xFEF5; *stepit = TRUE; break ; |
060 | case 0x0623: ucode = 0xFEF7; *stepit = TRUE; break ; |
061 | case 0x0625: ucode = 0xFEF9; *stepit = TRUE; break ; |
062 | case 0x0627: ucode = 0xFEFB; *stepit = TRUE; break ; |
063 | default : break ; |
064 | } |
065 |
066 | if (*stepit == TRUE) |
067 | { |
068 | switch (pos) |
069 | { |
070 | case ARABIC_1ST: |
071 | if (*(arab+2) == 0x0020) break ; |
072 | else { ucode += 0x001; break ; } |
073 | case ARABIC_MID: |
074 | if (*is2set == TRUE) break ; |
075 | else { ucode += 0x001; break ; } |
076 | } |
077 |
078 | *is2set = nowChar; |
079 | return ucode; |
080 | } |
081 | } |
082 |
083 | switch (pos) |
084 | { |
085 | //case ARABIC_SIN: return ucode; |
086 | case ARABIC_1ST: |
087 | if (nowChar == TRUE) ucode = *arab; |
088 | else ucode += ARABIC_1ST; |
089 | break ; |
090 | case ARABIC_MID: |
091 | if (nowChar == TRUE) // 2개 짜리 |
092 | { |
093 | if (*is2set == TRUE) ucode = *arab; |
094 | else ucode += ARABIC_END; |
095 | } |
096 | else // 4개 짜리 |
097 | { |
098 | if (*is2set == TRUE) ucode += ARABIC_1ST; |
099 | else ucode += ARABIC_MID; |
100 | } |
101 | break ; |
102 | case ARABIC_END: |
103 | if (nowChar == TRUE) // 2개 짜리 |
104 | { |
105 | if (*is2set == TRUE) ucode = *arab; |
106 | else ucode += ARABIC_END; |
107 | } |
108 | else // 4개 짜리 |
109 | { |
110 | if (*is2set == TRUE) ucode = *arab; |
111 | else ucode += ARABIC_END; |
112 | } |
113 | break ; |
114 | } |
115 |
116 | *is2set = nowChar; |
117 | return ucode; |
118 | } |
119 |
120 | BOOL isarabicstr(u16 *pu16String, u16 u16StrLength) |
121 | { |
122 | u16 index = 0; |
123 | BOOL arabic_exist = FALSE; |
124 |
125 | for (index = 0;index < u16StrLength;index++) |
126 | { |
127 | if (pu16String[index] >= 0x060C && pu16String[index] <= 0x064B) return TRUE; |
128 | } |
129 |
130 | return arabic_exist; |
131 | } |
132 |
133 | void Arabic_automata(u16 *target_str, u16 *src_str,u16 u16Length) |
134 | { |
135 | BOOL b8ArabSpe = FALSE; |
136 | BOOL b8is2set = FALSE; |
137 | u16 u16Index = 0; |
138 | u16 u16Index2 = 0; |
139 | u16 u16Index3 = 0; |
140 | u16 u16offset = 0; |
141 | u16 *temp_str = NULL; |
142 | |
143 | temp_str = malloc ((u16Length + 1) * 2); |
144 |
145 | for (u16Index = 0; u16Index < u16Length;) |
146 | { |
147 | if (src_str[u16Index] >= 0x060C && u16Index < u16Length) // arabic |
148 | { |
149 | u16Index2 = u16Index; // 아랍어 시작점 |
150 | while (src_str[u16Index] >= 0x060C && u16Index < u16Length ) u16Index++; |
151 | if (u16Index - u16Index2 == 1) // 단독형 |
152 | { |
153 | temp_str[u16Length - u16Index2 + u16offset - 1] = src_str[u16Index2]; |
154 | } |
155 | else // 2개 이상 단어 |
156 | { |
157 | // 첫자 |
158 | temp_str[u16Length - u16Index2 + u16offset - 1] = ArabicEncode(src_str + u16Index2, ARABIC_1ST ,&b8ArabSpe, &b8is2set); |
159 | if (b8ArabSpe == TRUE) {++u16offset; ++u16Index2;} |
160 | |
161 | //중간글자 |
162 | for (u16Index3 = u16Index2 + 1; u16Index3 < u16Index - 1; u16Index3++) |
163 | { |
164 | if (b8ArabSpe == TRUE) |
165 | { |
166 | b8ArabSpe = FALSE; |
167 | temp_str[u16Length - u16Index3 + u16offset - 1] = ArabicEncode(src_str + u16Index3, ARABIC_1ST ,&b8ArabSpe, &b8is2set); |
168 | } |
169 | else |
170 | temp_str[u16Length - u16Index3 + u16offset - 1] = ArabicEncode(src_str + u16Index3, ARABIC_MID ,&b8ArabSpe, &b8is2set); |
171 | if (b8ArabSpe == TRUE) {++u16offset; ++u16Index3;} |
172 | } |
173 | //마지막자 |
174 | if (u16Index - u16Index2 != 1) |
175 | { |
176 | if (b8ArabSpe == TRUE) |
177 | { |
178 | b8ArabSpe = FALSE; |
179 | temp_str[u16Length - u16Index + u16offset] = ArabicEncode(src_str + u16Index - 1, ARABIC_1ST ,&b8ArabSpe, &b8is2set); |
180 | } |
181 | else |
182 | temp_str[u16Length - u16Index + u16offset] = ArabicEncode(src_str + u16Index - 1, ARABIC_END ,&b8ArabSpe, &b8is2set); |
183 | if (b8ArabSpe == TRUE) b8ArabSpe = FALSE; |
184 | } |
185 | } |
186 | } |
187 | else if (src_str[u16Index] == 0x0020) |
188 | { |
189 | u16Index2 = u16Index++; |
190 | temp_str[u16Length - u16Index + u16offset] = src_str[u16Index - 1]; |
191 | } |
192 | else // not arabic |
193 | { |
194 | u16Index2 = u16Index; // 영어 시작점 |
195 | while (src_str[u16Index] > 32 && src_str[u16Index] < 255 && u16Index < u16Length && src_str[u16Index] != 0x0020) u16Index++; |
196 | for (u16Index3 = 0; u16Index3 < u16Index - u16Index2; u16Index3++) |
197 | { |
198 | temp_str[u16Length - u16Index + u16Index3 + u16offset] = src_str[u16Index2 + u16Index3]; |
199 | } |
200 | } |
201 | } |
202 |
203 | if (u16offset > 0) |
204 | { |
205 | memcpy (target_str,temp_str + u16offset,(u16Length - u16offset) * 2); |
206 | memset (target_str + (u16Length - u16offset),0x00,u16offset * 2); |
207 | } |
208 | else |
209 | { |
210 | memcpy (target_str,temp_str,(u16Length) * 2); |
211 | memset (target_str + u16Length,0x00,2); |
212 | } |
213 |
214 | free (temp_str); |
215 | } |
아놔 syntax highlighter 왜 이따구야 ㅠ.ㅠ
'모종의 음모 > 아랍어 오토마타' 카테고리의 다른 글
아랍어 조합 방법 - unicode (5) | 2008.11.16 |
---|