From c79e605b60040c4c0e5c792fa447487c2b8ae246 Mon Sep 17 00:00:00 2001 From: horus_arch Date: Mon, 15 Jun 2015 13:38:47 +0200 Subject: Use flex to extract html. Icono-font used for icons. --- scanner.go | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 scanner.go (limited to 'scanner.go') diff --git a/scanner.go b/scanner.go new file mode 100644 index 0000000..10be11b --- /dev/null +++ b/scanner.go @@ -0,0 +1,56 @@ +package main + +/* +#include +#include +#include "urlscanner.h" +*/ +import "C" +import ( + "log" + "strings" +) + +type Link struct { + Url string + Text string + IsDir bool +} + +func getToken(input string) []Link { + var token C.int + var ls []Link + var l Link + + C.scan_string(C.CString(input)) + + for token = C.yylex(); token != C.MYEOF; token = C.yylex() { + + if token == C.TOKEN_URL { + // flex reads the href attr + + l.Url = C.GoString(C.yylval) + + if strings.HasSuffix(l.Url, "/") { + l.IsDir = true + } + + } else if token == C.TOKEN_TEXT { + // flex reads the link description + + l.Text = C.GoString(C.yylval) + ls = append(ls, l) + l = Link{} + } else { + // lexical error + + l = Link{} + log.Printf("Lexical Error on line %d \n", C.yylineno) + continue + } + } + + return ls +} + +// sort -- cgit v1.2.3