From c79e605b60040c4c0e5c792fa447487c2b8ae246 Mon Sep 17 00:00:00 2001 From: horus_arch Date: Mon, 15 Jun 2015 13:38:47 +0200 Subject: Use flex to extract html. Icono-font used for icons. --- urlscanner.l | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 urlscanner.l (limited to 'urlscanner.l') diff --git a/urlscanner.l b/urlscanner.l new file mode 100644 index 0000000..5f9acb6 --- /dev/null +++ b/urlscanner.l @@ -0,0 +1,52 @@ +%option noyywrap +%option yylineno +%option nounput +%option nodefault + +%{ +#include "urlscanner.h" +#include "string.h" +yystype yylval; +%} + +%x ATAG_HREF +%x ATAG_BETWEEN +%x ATAG_DESC + +%% + +[^<]* { } + +"]*"href=\"" { BEGIN(ATAG_HREF); } + +[^\"]+ { + yylval = strdup(yytext); + BEGIN(ATAG_BETWEEN); + return TOKEN_URL; +} + +\" { + yylval = ""; + BEGIN(ATAG_BETWEEN); + return TOKEN_URL; +} + +[^>]* { } + +">" { BEGIN(ATAG_DESC); } + +[^<]* { + yylval = strdup(yytext); + BEGIN(INITIAL); + return TOKEN_TEXT; +} + +. { /* skip */ } + +<> { return MYEOF; } + +%% + +void scan_string(const char* str) { + yy_switch_to_buffer(yy_scan_string(str)); +} -- cgit v1.2.3