summaryrefslogtreecommitdiff
path: root/urlscanner.l
diff options
context:
space:
mode:
authorhorus_arch2015-06-15 13:38:47 +0200
committerhorus_arch2015-06-15 13:38:47 +0200
commitc79e605b60040c4c0e5c792fa447487c2b8ae246 (patch)
treed20c97b59c6252e555618a128ea63f6fa9d016d9 /urlscanner.l
parent3a09b2509102d58fe4e52a9a8fa699f6b42dc327 (diff)
downloaduhttpd-c79e605b60040c4c0e5c792fa447487c2b8ae246.tar.gz
Use flex to extract html. Icono-font used for icons.
Diffstat (limited to 'urlscanner.l')
-rw-r--r--urlscanner.l52
1 files changed, 52 insertions, 0 deletions
diff --git a/urlscanner.l b/urlscanner.l
new file mode 100644
index 0000000..5f9acb6
--- /dev/null
+++ b/urlscanner.l
@@ -0,0 +1,52 @@
+%option noyywrap
+%option yylineno
+%option nounput
+%option nodefault
+
+%{
+#include "urlscanner.h"
+#include "string.h"
+yystype yylval;
+%}
+
+%x ATAG_HREF
+%x ATAG_BETWEEN
+%x ATAG_DESC
+
+%%
+
+[^<]* { }
+
+"<a"[^>]*"href=\"" { BEGIN(ATAG_HREF); }
+
+<ATAG_HREF>[^\"]+ {
+ yylval = strdup(yytext);
+ BEGIN(ATAG_BETWEEN);
+ return TOKEN_URL;
+}
+
+<ATAG_HREF>\" {
+ yylval = "";
+ BEGIN(ATAG_BETWEEN);
+ return TOKEN_URL;
+}
+
+<ATAG_BETWEEN>[^>]* { }
+
+<ATAG_BETWEEN>">" { BEGIN(ATAG_DESC); }
+
+<ATAG_DESC>[^<]* {
+ yylval = strdup(yytext);
+ BEGIN(INITIAL);
+ return TOKEN_TEXT;
+}
+
+. { /* skip */ }
+
+<<EOF>> { return MYEOF; }
+
+%%
+
+void scan_string(const char* str) {
+ yy_switch_to_buffer(yy_scan_string(str));
+}