|
|
|
|
|
|
|
#include<stdio.h> |
|
#include<locale.h> |
|
#include<wchar.h> |
|
#include<stdlib.h> |
|
#include<string.h> |
|
|
|
struct table{ |
|
wchar_t tamil[10]; |
|
wchar_t english[10]; |
|
}tamil_map[1000]; |
|
int n_characters; |
|
|
|
int is_d_v(wchar_t character, wchar_t *d_v, int num_d_v) |
|
{ |
|
int i; |
|
for(i = 0; i<num_d_v; i++) |
|
{ |
|
if(character == d_v[i]) |
|
{ |
|
return 1; |
|
} |
|
} |
|
return 0; |
|
} |
|
|
|
int is_non_printable(wchar_t character) |
|
{ |
|
if( (character == L'\n') || (character == L'\r') || (character == L'\t') || (character == L' ') ) |
|
{ |
|
return 1; |
|
} |
|
return 0; |
|
} |
|
|
|
wchar_t* transliterate(wchar_t *token) |
|
{ |
|
int i; |
|
for(i=0; i<n_characters; i++) |
|
{ |
|
if(wcsncmp(token,tamil_map[i].tamil,wcslen(token)) == 0) |
|
{ |
|
return tamil_map[i].english; |
|
} |
|
} |
|
fprintf(stderr,"No english character in map file for %ls\n",token); |
|
return NULL; |
|
} |
|
|
|
int main(int argc, char *argv[]) |
|
{ |
|
int i; |
|
int j; |
|
wchar_t d_v[11]; |
|
wchar_t vowels[13]; |
|
wchar_t buffer[100]; |
|
wchar_t character; |
|
wchar_t token[3]; |
|
wchar_t character_previous; |
|
wchar_t *character_transliterated; |
|
wchar_t character_next; |
|
FILE *map_file; |
|
FILE *input_text; |
|
FILE *output_text; |
|
fpos_t pos; |
|
if( argc == 2 ) |
|
{ |
|
map_file = fopen(argv[1],"r"); |
|
input_text = stdin; |
|
output_text = stdout; |
|
} |
|
|
|
else if( argc < 2 ) |
|
{ |
|
fprintf(stderr,"./tamil_english map_file or\n./tamil_english map_file input output\n"); |
|
return 2; |
|
} |
|
|
|
else if ( argc == 4 ) |
|
{ |
|
map_file = fopen(argv[1],"r"); |
|
input_text = fopen(argv[2],"r"); |
|
output_text = fopen(argv[3],"w"); |
|
} |
|
else |
|
{ |
|
fprintf(stderr,"./tamil_english map_file or\n./tamil_english map_file input output\n"); |
|
return 3; |
|
} |
|
|
|
|
|
|
|
if( !setlocale(LC_CTYPE,"") ) |
|
{ |
|
fprintf(stderr, "Locale not specified. Set LANG=ta_IN.utf8\n"); |
|
return 1; |
|
} |
|
|
|
|
|
n_characters = 0; |
|
while( (fgetws((&buffer[0]),100,map_file)) != NULL ) |
|
{ |
|
|
|
if(buffer[0] == L'#') |
|
{ |
|
continue; |
|
} |
|
swscanf(buffer,L"%ls %ls", &tamil_map[n_characters].tamil, tamil_map[n_characters].english); |
|
|
|
n_characters++; |
|
} |
|
|
|
d_v[0] = L'\u0BBE'; |
|
d_v[1] = L'\u0BBF'; |
|
d_v[2] = L'\u0BC0'; |
|
d_v[3] = L'\u0BC1'; |
|
d_v[4] = L'\u0BC2'; |
|
d_v[5] = L'\u0BC6'; |
|
d_v[6] = L'\u0BC7'; |
|
d_v[7] = L'\u0BC8'; |
|
d_v[8] = L'\u0BCA'; |
|
d_v[9] = L'\u0BCB'; |
|
d_v[10] = L'\u0BCC'; |
|
|
|
|
|
vowels[0] = L'\u0B85'; |
|
vowels[1] = L'\u0B86'; |
|
vowels[2] = L'\u0B87'; |
|
vowels[3] = L'\u0B88'; |
|
vowels[4] = L'\u0B89'; |
|
vowels[5] = L'\u0B8A'; |
|
vowels[6] = L'\u0B8E'; |
|
vowels[7] = L'\u0B8F'; |
|
vowels[8] = L'\u0B90'; |
|
vowels[9] = L'\u0B92'; |
|
vowels[10] = L'\u0B93'; |
|
vowels[11] = L'\u0B94'; |
|
vowels[12] = L'\u0B83'; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
while( (character = fgetwc(input_text)) != WEOF ) |
|
{ |
|
|
|
|
|
|
|
fgetpos(input_text, &pos); |
|
character_next = fgetwc(input_text); |
|
|
|
if( is_non_printable(character) ) |
|
{ |
|
|
|
fprintf(output_text,"%lc",character); |
|
fsetpos(input_text, &pos); |
|
} |
|
else if(is_d_v(character, vowels, 13)) |
|
{ |
|
|
|
if(character == L'\u0B83' && character_next == L'\u0BAA') |
|
{ |
|
wmemset(token,L'\0',2); |
|
token[0] = character; |
|
|
|
|
|
character_transliterated = transliterate(token); |
|
if(character_transliterated != NULL) |
|
{ |
|
fprintf(output_text,"%ls",character_transliterated); |
|
} |
|
} |
|
else |
|
{ |
|
wmemset(token,L'\0',2); |
|
token[0] = character; |
|
fsetpos(input_text, &pos); |
|
|
|
character_transliterated = transliterate(token); |
|
if(character_transliterated != NULL) |
|
{ |
|
fprintf(output_text,"%ls",character_transliterated); |
|
} |
|
} |
|
} |
|
else |
|
{ |
|
if(character_next == L'\u0BCD') |
|
{ |
|
wmemset(token,L'\0',3); |
|
token[0]=character; |
|
token[1]=character_next; |
|
|
|
character_transliterated = transliterate(token); |
|
if(character_transliterated != NULL) |
|
{ |
|
fprintf(output_text,"%ls",character_transliterated); |
|
} |
|
} |
|
else if(is_d_v(character, d_v, 11)) |
|
{ |
|
wmemset(token,L'\0',2); |
|
token[0]=character; |
|
fsetpos(input_text, &pos); |
|
|
|
character_transliterated = transliterate(token); |
|
if(character_transliterated != NULL) |
|
{ |
|
fprintf(output_text,"%ls",character_transliterated); |
|
} |
|
} |
|
else if(!is_d_v(character_next, d_v, 11)) |
|
{ |
|
wmemset(token,L'\0',2); |
|
token[0] = character; |
|
fsetpos(input_text, &pos); |
|
|
|
character_transliterated = transliterate(token); |
|
if(character_transliterated != NULL) |
|
{ |
|
if(wcsncmp(token,character_transliterated,wcslen(token)) == 0) |
|
{ |
|
fprintf(output_text,"%ls",character_transliterated); |
|
} |
|
else |
|
fprintf(output_text,"%lsa",character_transliterated); |
|
} |
|
|
|
} |
|
else if(is_d_v(character_next, d_v, 11)) |
|
{ |
|
wmemset(token,L'\0',2); |
|
fsetpos(input_text, &pos); |
|
token[0]=character; |
|
|
|
character_transliterated = transliterate(token); |
|
if(character_transliterated != NULL) |
|
{ |
|
fprintf(output_text,"%ls",character_transliterated); |
|
} |
|
} |
|
|
|
} |
|
} |
|
return 0; |
|
} |
|
|