forked from GabRayz/OCR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
spellcheck.c
129 lines (109 loc) · 2.53 KB
/
spellcheck.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <hunspell/hunspell.h>
#include "linkedlist.h"
int hasWhiteSpace(char *s)
{
size_t length = strlen(s);
for (size_t i = 0; i < length; i++)
{
if (*(s + i) == ' ')
return 1;
}
return 0;
}
char *pickWord(char ***s)
{
if (*s != NULL)
{
int i = 0;
while (s[i] != NULL)
{
if (!hasWhiteSpace(s[i][0]))
return s[i][0];
i++;
}
}
return NULL;
}
char *spellcheck_word(Hunhandle *h, char *w)
{
// Check if the word w is wrongly spelled
if (!Hunspell_spell(h, w))
{
// Hunspell suggest severals correct words
char ***s = malloc(sizeof(char));
Hunspell_suggest(h, s, w);
// Return the first suggested value
if (*s != NULL)
return s[0][0];
// char *res = pickWord(s);
// if (res != NULL)
// return res;
}
return w;
}
int isSpecial(char c)
{
char special[24] = " .,;?/!:&\"\'{([-`_)]}=+*";
for (size_t i = 0; i < 24; i++)
{
if (c == special[i])
return 1;
}
return 0;
}
void I2l(char *s)
{
size_t length = strlen(s);
for (size_t i = 0; i < length; i++)
{
if (s[i] == 'I' && i > 0 && s[i - 1] != '\n')
s[i] = 'l';
}
}
char *spellcheck(char *s)
{
int size = strlen(s) + 1;
char *res = malloc(sizeof(char) * size);
Hunhandle *h = Hunspell_create(
"./Dictionnary/en_US.aff",
"./Dictionnary/en_US.dic");
char *word = strtok(s, " ");
char *correctedWord = NULL;
size_t i = 0;
while (word != NULL)
{
I2l(word);
correctedWord = spellcheck_word(h, word);
int len = strlen(word);
int cLen = strlen(correctedWord);
// Reallocate the buffer according to the new word's length
if (cLen - len > 0)
{
size += (cLen - len);
res = realloc(res, size);
}
int j = 0;
while (j < cLen)
res[i++] = correctedWord[j++];
if (isSpecial(word[len - 1]) && correctedWord[cLen - 1] != '.' && correctedWord[cLen - 1] != ',')
{
res[i++] = word[len - 1];
}
res[i++] = ' ';
word = strtok(NULL, " ");
}
res[i] = '\0';
Hunspell_destroy(h);
free(s);
i = 0;
while (res[i])
{
if (i > 0 && res[i - 1] != ' ' && res[i - 1] != '\n' && res[i] >= 'A' && res[i] <= 'Z')
res[i] += 'a' - 'A';
i++;
}
return res;
}