diff options
Diffstat (limited to 'urlscanner.l')
| -rw-r--r-- | urlscanner.l | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/urlscanner.l b/urlscanner.l new file mode 100644 index 0000000..5f9acb6 --- /dev/null +++ b/urlscanner.l @@ -0,0 +1,52 @@ +%option noyywrap +%option yylineno +%option nounput +%option nodefault + +%{ +#include "urlscanner.h" +#include "string.h" +yystype yylval; +%} + +%x ATAG_HREF +%x ATAG_BETWEEN +%x ATAG_DESC + +%% + +[^<]* { } + +"<a"[^>]*"href=\"" { BEGIN(ATAG_HREF); } + +<ATAG_HREF>[^\"]+ { + yylval = strdup(yytext); + BEGIN(ATAG_BETWEEN); + return TOKEN_URL; +} + +<ATAG_HREF>\" { + yylval = ""; + BEGIN(ATAG_BETWEEN); + return TOKEN_URL; +} + +<ATAG_BETWEEN>[^>]* { } + +<ATAG_BETWEEN>">" { BEGIN(ATAG_DESC); } + +<ATAG_DESC>[^<]* { + yylval = strdup(yytext); + BEGIN(INITIAL); + return TOKEN_TEXT; +} + +. { /* skip */ } + +<<EOF>> { return MYEOF; } + +%% + +void scan_string(const char* str) { + yy_switch_to_buffer(yy_scan_string(str)); +} |
