summaryrefslogtreecommitdiff
path: root/urlscanner.l
diff options
context:
space:
mode:
Diffstat (limited to 'urlscanner.l')
-rw-r--r--urlscanner.l52
1 files changed, 52 insertions, 0 deletions
diff --git a/urlscanner.l b/urlscanner.l
new file mode 100644
index 0000000..5f9acb6
--- /dev/null
+++ b/urlscanner.l
@@ -0,0 +1,52 @@
+%option noyywrap
+%option yylineno
+%option nounput
+%option nodefault
+
+%{
+#include "urlscanner.h"
+#include "string.h"
+yystype yylval;
+%}
+
+%x ATAG_HREF
+%x ATAG_BETWEEN
+%x ATAG_DESC
+
+%%
+
+[^<]* { }
+
+"<a"[^>]*"href=\"" { BEGIN(ATAG_HREF); }
+
+<ATAG_HREF>[^\"]+ {
+ yylval = strdup(yytext);
+ BEGIN(ATAG_BETWEEN);
+ return TOKEN_URL;
+}
+
+<ATAG_HREF>\" {
+ yylval = "";
+ BEGIN(ATAG_BETWEEN);
+ return TOKEN_URL;
+}
+
+<ATAG_BETWEEN>[^>]* { }
+
+<ATAG_BETWEEN>">" { BEGIN(ATAG_DESC); }
+
+<ATAG_DESC>[^<]* {
+ yylval = strdup(yytext);
+ BEGIN(INITIAL);
+ return TOKEN_TEXT;
+}
+
+. { /* skip */ }
+
+<<EOF>> { return MYEOF; }
+
+%%
+
+void scan_string(const char* str) {
+ yy_switch_to_buffer(yy_scan_string(str));
+}