comparison hoedown/src/autolink.c @ 2:2dce41ab17e8

hoedown: import 3.0.7
author David Demelier <markand@malikania.fr>
date Wed, 24 Feb 2016 20:54:52 +0100
parents
children
comparison
equal deleted inserted replaced
1:4d89bd8a3f7f 2:2dce41ab17e8
1 #include "autolink.h"
2
3 #include <string.h>
4 #include <stdlib.h>
5 #include <stdio.h>
6 #include <ctype.h>
7
8 #ifndef _MSC_VER
9 #include <strings.h>
10 #else
11 #define strncasecmp _strnicmp
12 #endif
13
14 int
15 hoedown_autolink_is_safe(const uint8_t *data, size_t size)
16 {
17 static const size_t valid_uris_count = 6;
18 static const char *valid_uris[] = {
19 "http://", "https://", "/", "#", "ftp://", "mailto:"
20 };
21 static const size_t valid_uris_size[] = { 7, 8, 1, 1, 6, 7 };
22 size_t i;
23
24 for (i = 0; i < valid_uris_count; ++i) {
25 size_t len = valid_uris_size[i];
26
27 if (size > len &&
28 strncasecmp((char *)data, valid_uris[i], len) == 0 &&
29 isalnum(data[len]))
30 return 1;
31 }
32
33 return 0;
34 }
35
36 static size_t
37 autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
38 {
39 uint8_t cclose, copen = 0;
40 size_t i;
41
42 for (i = 0; i < link_end; ++i)
43 if (data[i] == '<') {
44 link_end = i;
45 break;
46 }
47
48 while (link_end > 0) {
49 if (strchr("?!.,:", data[link_end - 1]) != NULL)
50 link_end--;
51
52 else if (data[link_end - 1] == ';') {
53 size_t new_end = link_end - 2;
54
55 while (new_end > 0 && isalpha(data[new_end]))
56 new_end--;
57
58 if (new_end < link_end - 2 && data[new_end] == '&')
59 link_end = new_end;
60 else
61 link_end--;
62 }
63 else break;
64 }
65
66 if (link_end == 0)
67 return 0;
68
69 cclose = data[link_end - 1];
70
71 switch (cclose) {
72 case '"': copen = '"'; break;
73 case '\'': copen = '\''; break;
74 case ')': copen = '('; break;
75 case ']': copen = '['; break;
76 case '}': copen = '{'; break;
77 }
78
79 if (copen != 0) {
80 size_t closing = 0;
81 size_t opening = 0;
82 size_t i = 0;
83
84 /* Try to close the final punctuation sign in this same line;
85 * if we managed to close it outside of the URL, that means that it's
86 * not part of the URL. If it closes inside the URL, that means it
87 * is part of the URL.
88 *
89 * Examples:
90 *
91 * foo http://www.pokemon.com/Pikachu_(Electric) bar
92 * => http://www.pokemon.com/Pikachu_(Electric)
93 *
94 * foo (http://www.pokemon.com/Pikachu_(Electric)) bar
95 * => http://www.pokemon.com/Pikachu_(Electric)
96 *
97 * foo http://www.pokemon.com/Pikachu_(Electric)) bar
98 * => http://www.pokemon.com/Pikachu_(Electric))
99 *
100 * (foo http://www.pokemon.com/Pikachu_(Electric)) bar
101 * => foo http://www.pokemon.com/Pikachu_(Electric)
102 */
103
104 while (i < link_end) {
105 if (data[i] == copen)
106 opening++;
107 else if (data[i] == cclose)
108 closing++;
109
110 i++;
111 }
112
113 if (closing != opening)
114 link_end--;
115 }
116
117 return link_end;
118 }
119
120 static size_t
121 check_domain(uint8_t *data, size_t size, int allow_short)
122 {
123 size_t i, np = 0;
124
125 if (!isalnum(data[0]))
126 return 0;
127
128 for (i = 1; i < size - 1; ++i) {
129 if (strchr(".:", data[i]) != NULL) np++;
130 else if (!isalnum(data[i]) && data[i] != '-') break;
131 }
132
133 if (allow_short) {
134 /* We don't need a valid domain in the strict sense (with
135 * least one dot; so just make sure it's composed of valid
136 * domain characters and return the length of the the valid
137 * sequence. */
138 return i;
139 } else {
140 /* a valid domain needs to have at least a dot.
141 * that's as far as we get */
142 return np ? i : 0;
143 }
144 }
145
146 size_t
147 hoedown_autolink__www(
148 size_t *rewind_p,
149 hoedown_buffer *link,
150 uint8_t *data,
151 size_t max_rewind,
152 size_t size,
153 unsigned int flags)
154 {
155 size_t link_end;
156
157 if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
158 return 0;
159
160 if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
161 return 0;
162
163 link_end = check_domain(data, size, 0);
164
165 if (link_end == 0)
166 return 0;
167
168 while (link_end < size && !isspace(data[link_end]))
169 link_end++;
170
171 link_end = autolink_delim(data, link_end, max_rewind, size);
172
173 if (link_end == 0)
174 return 0;
175
176 hoedown_buffer_put(link, data, link_end);
177 *rewind_p = 0;
178
179 return (int)link_end;
180 }
181
182 size_t
183 hoedown_autolink__email(
184 size_t *rewind_p,
185 hoedown_buffer *link,
186 uint8_t *data,
187 size_t max_rewind,
188 size_t size,
189 unsigned int flags)
190 {
191 size_t link_end, rewind;
192 int nb = 0, np = 0;
193
194 for (rewind = 0; rewind < max_rewind; ++rewind) {
195 uint8_t c = data[-1 - rewind];
196
197 if (isalnum(c))
198 continue;
199
200 if (strchr(".+-_", c) != NULL)
201 continue;
202
203 break;
204 }
205
206 if (rewind == 0)
207 return 0;
208
209 for (link_end = 0; link_end < size; ++link_end) {
210 uint8_t c = data[link_end];
211
212 if (isalnum(c))
213 continue;
214
215 if (c == '@')
216 nb++;
217 else if (c == '.' && link_end < size - 1)
218 np++;
219 else if (c != '-' && c != '_')
220 break;
221 }
222
223 if (link_end < 2 || nb != 1 || np == 0 ||
224 !isalpha(data[link_end - 1]))
225 return 0;
226
227 link_end = autolink_delim(data, link_end, max_rewind, size);
228
229 if (link_end == 0)
230 return 0;
231
232 hoedown_buffer_put(link, data - rewind, link_end + rewind);
233 *rewind_p = rewind;
234
235 return link_end;
236 }
237
238 size_t
239 hoedown_autolink__url(
240 size_t *rewind_p,
241 hoedown_buffer *link,
242 uint8_t *data,
243 size_t max_rewind,
244 size_t size,
245 unsigned int flags)
246 {
247 size_t link_end, rewind = 0, domain_len;
248
249 if (size < 4 || data[1] != '/' || data[2] != '/')
250 return 0;
251
252 while (rewind < max_rewind && isalpha(data[-1 - rewind]))
253 rewind++;
254
255 if (!hoedown_autolink_is_safe(data - rewind, size + rewind))
256 return 0;
257
258 link_end = strlen("://");
259
260 domain_len = check_domain(
261 data + link_end,
262 size - link_end,
263 flags & HOEDOWN_AUTOLINK_SHORT_DOMAINS);
264
265 if (domain_len == 0)
266 return 0;
267
268 link_end += domain_len;
269 while (link_end < size && !isspace(data[link_end]))
270 link_end++;
271
272 link_end = autolink_delim(data, link_end, max_rewind, size);
273
274 if (link_end == 0)
275 return 0;
276
277 hoedown_buffer_put(link, data - rewind, link_end + rewind);
278 *rewind_p = rewind;
279
280 return link_end;
281 }