'AUTOMATA'에 해당되는 글 1건

  1. 2009.03.05 유니코드 아랍어 오토마타 8
001typedef unsigned char       u8;
002typedef unsigned short      u16;
003 
004#define ARABIC_SIN 0
005#define ARABIC_END 1
006#define ARABIC_1ST 2
007#define ARABIC_MID 3
008 
009u16 ArabicEncode(u16 *arab, u8 pos, BOOL* stepit, BOOL* is2set)
010{
011    u16 ucode = 0;
012    BOOL nowChar = FALSE;
013    switch(*arab)
014    {
015        // 2개일 경우
016        case 0x0622: ucode = 0xFE81; nowChar = TRUE; break;
017        case 0x0623: ucode = 0xFE83; nowChar = TRUE; break;
018        case 0x0624: ucode = 0xFE85; nowChar = TRUE; break;
019        case 0x0625: ucode = 0xFE87; nowChar = TRUE; break;
020        case 0x0627: ucode = 0xFE8D; nowChar = TRUE; break;
021        case 0x0629: ucode = 0xFE93; nowChar = TRUE; break;
022        case 0x062F: ucode = 0xFEA9; nowChar = TRUE; break;
023        case 0x0630: ucode = 0xFEAB; nowChar = TRUE; break;
024        case 0x0631: ucode = 0xFEAD; nowChar = TRUE; break;
025        case 0x0632: ucode = 0xFEAF; nowChar = TRUE; break;
026        case 0x0648: ucode = 0xFEED; nowChar = TRUE; break;
027        case 0x0649: ucode = 0xFEEF; nowChar = TRUE; break;
028 
029        // 4개 일경우
030        case 0x0626: ucode = 0xFE89; nowChar = FALSE; break;
031        case 0x0628: ucode = 0xFE8F; nowChar = FALSE; break;
032        case 0x062A: ucode = 0xFE95; nowChar = FALSE; break;
033        case 0x062B: ucode = 0xFE99; nowChar = FALSE; break;
034        case 0x062C: ucode = 0xFE9D; nowChar = FALSE; break;
035        case 0x062D: ucode = 0xFEA1; nowChar = FALSE; break;
036        case 0x062E: ucode = 0xFEA5; nowChar = FALSE; break;
037        case 0x0633: ucode = 0xFEB1; nowChar = FALSE; break;
038        case 0x0634: ucode = 0xFEB5; nowChar = FALSE; break;
039        case 0x0635: ucode = 0xFEB9; nowChar = FALSE; break;
040        case 0x0636: ucode = 0xFEBD; nowChar = FALSE; break;
041        case 0x0637: ucode = 0xFEC1; nowChar = FALSE; break;
042        case 0x0638: ucode = 0xFEC5; nowChar = FALSE; break;
043        case 0x0639: ucode = 0xFEC9; nowChar = FALSE; break;
044        case 0x063A: ucode = 0xFECD; nowChar = FALSE; break;
045        case 0x0641: ucode = 0xFED1; nowChar = FALSE; break;
046        case 0x0642: ucode = 0xFED5; nowChar = FALSE; break;
047        case 0x0643: ucode = 0xFED9; nowChar = FALSE; break;
048        case 0x0644: ucode = 0xFEDD; nowChar = FALSE; break;
049        case 0x0645: ucode = 0xFEE1; nowChar = FALSE; break;
050        case 0x0646: ucode = 0xFEE5; nowChar = FALSE; break;
051        case 0x0647: ucode = 0xFEE9; nowChar = FALSE; break;
052        case 0x064A: ucode = 0xFEF1; nowChar = FALSE; break;
053    }
054 
055    if((*arab == 0x0644) && (pos != ARABIC_END) && (pos != ARABIC_SIN))
056    {
057        switch(*(arab+1))
058        {
059            case 0x0622: ucode = 0xFEF5; *stepit = TRUE; break;
060            case 0x0623: ucode = 0xFEF7; *stepit = TRUE; break;
061            case 0x0625: ucode = 0xFEF9; *stepit = TRUE; break;
062            case 0x0627: ucode = 0xFEFB; *stepit = TRUE; break;
063            default: break;
064        }
065 
066        if(*stepit == TRUE)
067        {
068            switch(pos)
069            {
070                case ARABIC_1ST:
071                    if(*(arab+2) == 0x0020) break;
072                    else { ucode += 0x001; break; }
073                case ARABIC_MID:
074                    if(*is2set == TRUE) break;
075                    else { ucode += 0x001; break; }
076            }
077 
078            *is2set = nowChar;
079            return ucode;
080        }
081    }
082 
083    switch(pos)
084    {
085        //case ARABIC_SIN: return ucode;
086        case ARABIC_1ST:
087                if(nowChar == TRUE) ucode = *arab;
088                else ucode += ARABIC_1ST;
089                break;
090        case ARABIC_MID:
091                if(nowChar == TRUE) // 2개 짜리
092                {
093                    if(*is2set == TRUE) ucode = *arab;
094                    else ucode += ARABIC_END;
095                }
096                else // 4개 짜리
097                {
098                    if(*is2set == TRUE) ucode += ARABIC_1ST;
099                    else ucode += ARABIC_MID;
100                }
101                break;
102        case ARABIC_END:
103                if(nowChar == TRUE) // 2개 짜리
104                {
105                    if(*is2set == TRUE) ucode = *arab;
106                    else ucode += ARABIC_END;
107                }
108                else // 4개 짜리
109                {
110                    if(*is2set == TRUE) ucode = *arab;
111                    else ucode += ARABIC_END;
112                }
113                break;
114    }
115 
116    *is2set = nowChar;
117    return ucode;
118}
119 
120BOOL isarabicstr(u16 *pu16String, u16 u16StrLength)
121{
122    u16 index = 0;
123    BOOL arabic_exist = FALSE;
124 
125    for(index = 0;index < u16StrLength;index++)
126    {
127        if(pu16String[index] >= 0x060C && pu16String[index] <= 0x064B) return TRUE;
128    }
129 
130    return arabic_exist;
131}
132 
133void Arabic_automata(u16 *target_str, u16 *src_str,u16 u16Length)
134{
135    BOOL b8ArabSpe = FALSE;
136    BOOL b8is2set = FALSE;
137    u16 u16Index = 0;
138    u16 u16Index2 = 0;
139    u16 u16Index3 = 0;
140    u16 u16offset = 0;
141    u16 *temp_str = NULL;
142     
143    temp_str = malloc((u16Length + 1) * 2);
144 
145    for (u16Index = 0; u16Index < u16Length;)
146    {
147        if (src_str[u16Index] >= 0x060C && u16Index < u16Length) // arabic
148        {
149            u16Index2 = u16Index; // 아랍어 시작점
150            while(src_str[u16Index] >= 0x060C && u16Index < u16Length ) u16Index++;
151            if(u16Index - u16Index2 == 1) // 단독형
152            {
153                temp_str[u16Length - u16Index2 + u16offset - 1] = src_str[u16Index2];
154            }
155            else // 2개 이상 단어
156            {
157                // 첫자
158                temp_str[u16Length - u16Index2 + u16offset - 1] = ArabicEncode(src_str + u16Index2, ARABIC_1ST ,&b8ArabSpe, &b8is2set);
159                if(b8ArabSpe == TRUE) {++u16offset; ++u16Index2;}
160                 
161                //중간글자
162                for(u16Index3 = u16Index2 + 1; u16Index3 < u16Index - 1; u16Index3++)
163                {
164                    if(b8ArabSpe == TRUE)
165                    {
166                        b8ArabSpe = FALSE;
167                        temp_str[u16Length - u16Index3 + u16offset - 1] = ArabicEncode(src_str + u16Index3, ARABIC_1ST ,&b8ArabSpe, &b8is2set);
168                    }
169                    else
170                        temp_str[u16Length - u16Index3 + u16offset - 1] = ArabicEncode(src_str + u16Index3, ARABIC_MID ,&b8ArabSpe, &b8is2set);
171                    if(b8ArabSpe == TRUE) {++u16offset; ++u16Index3;}
172                }
173                //마지막자
174                if(u16Index - u16Index2 != 1)
175                {
176                    if(b8ArabSpe == TRUE)
177                    {
178                        b8ArabSpe = FALSE;
179                        temp_str[u16Length - u16Index + u16offset] = ArabicEncode(src_str + u16Index - 1, ARABIC_1ST ,&b8ArabSpe, &b8is2set);
180                    }
181                    else
182                        temp_str[u16Length - u16Index + u16offset] = ArabicEncode(src_str + u16Index - 1, ARABIC_END ,&b8ArabSpe, &b8is2set);
183                    if(b8ArabSpe == TRUE) b8ArabSpe = FALSE;
184                }
185            }
186        }
187        else if (src_str[u16Index] == 0x0020)
188        {
189            u16Index2 = u16Index++;
190            temp_str[u16Length - u16Index + u16offset] = src_str[u16Index - 1];
191        }
192        else // not arabic
193        {
194            u16Index2 = u16Index; // 영어 시작점
195            while(src_str[u16Index] > 32 && src_str[u16Index] < 255  && u16Index < u16Length && src_str[u16Index] != 0x0020) u16Index++;
196            for(u16Index3 = 0; u16Index3 < u16Index - u16Index2; u16Index3++)
197            {
198                temp_str[u16Length - u16Index + u16Index3 + u16offset] = src_str[u16Index2 + u16Index3];
199            }
200        }
201    }
202 
203    if(u16offset > 0)
204    {
205        memcpy(target_str,temp_str + u16offset,(u16Length - u16offset) * 2);
206        memset(target_str + (u16Length - u16offset),0x00,u16offset * 2);
207    }
208    else
209    {
210        memcpy(target_str,temp_str,(u16Length) * 2);
211        memset(target_str + u16Length,0x00,2);
212    }
213 
214    free(temp_str);
215}



아놔 syntax highlighter 왜 이따구야 ㅠ.ㅠ

'모종의 음모 > 아랍어 오토마타' 카테고리의 다른 글

아랍어 조합 방법 - unicode  (5) 2008.11.16
Posted by 구차니