From 542b26d4dcc90f8186900d51960e3dfefaa0ad01 Mon Sep 17 00:00:00 2001 From: horus_arch Date: Fri, 9 Sep 2016 12:30:02 +0200 Subject: Removes dependencies on cgo (flex + bison) --- scanner.go | 56 ++++++++++++++++++++++++-------------------------------- urlscanner.h | 18 ------------------ urlscanner.l | 52 ---------------------------------------------------- 3 files changed, 24 insertions(+), 102 deletions(-) delete mode 100644 urlscanner.h delete mode 100644 urlscanner.l diff --git a/scanner.go b/scanner.go index 9b35de3..b972b70 100644 --- a/scanner.go +++ b/scanner.go @@ -1,14 +1,11 @@ package main -/* -#include -#include "urlscanner.h" -*/ -import "C" import ( "html/template" "log" "strings" + + "github.com/PuerkitoBio/goquery" ) type Link struct { @@ -18,39 +15,34 @@ type Link struct { } func getToken(input string) []Link { - var token C.int - var ls []Link - var l Link - - C.scan_string(C.CString(input)) - - for token = C.yylex(); token != C.MYEOF; token = C.yylex() { - if token == C.TOKEN_URL { - // flex reads the href attr + var links []Link + var link Link - l.Url = template.URL(C.GoString(C.yylval)) + html_code := strings.NewReader(input) + doc, err := goquery.NewDocumentFromReader(html_code) + if err != nil { + log.Fatal(err) + } - if strings.HasSuffix(C.GoString(C.yylval), "/") { - l.IsDir = true - } + doc.Find("a").Each(func(i int, s *goquery.Selection) { - } else if token == C.TOKEN_TEXT { - // flex reads the link description + url, ok := s.Attr("href") + if !ok { + log.Fatal("Fehler beim URL parsen") + } + link.Url = template.URL(url) - l.Text = template.HTML(C.GoString(C.yylval)) - ls = append(ls, l) - l = Link{} + if strings.HasSuffix(url, "/") { + link.IsDir = true } else { - // lexical error - - l = Link{} - log.Printf("Lexical Error on line %d \n", C.yylineno) - continue + link.IsDir = false } - } - return ls -} + link.Text = template.HTML(s.Text()) -// sort + links = append(links, link) + }) + + return links +} diff --git a/urlscanner.h b/urlscanner.h deleted file mode 100644 index f815c41..0000000 --- a/urlscanner.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef URLSCANNER_H -#define URLSCANNER_H - -#define MYEOF EOF -#define TOKEN_URL 257 -#define TOKEN_TEXT 258 - -#ifndef YYSTYPE -#define YYSTYPE yystype -typedef char* yystype; -#endif - -extern int yylex(); -extern yystype yylval; -extern int yylineno; -extern void scan_string ( const char *str ); - -#endif diff --git a/urlscanner.l b/urlscanner.l deleted file mode 100644 index 79768b2..0000000 --- a/urlscanner.l +++ /dev/null @@ -1,52 +0,0 @@ -%option noyywrap -%option yylineno -%option nounput - -%{ -#include "urlscanner.h" -#include "string.h" -yystype yylval; -#define YY_NO_INPUT -%} - -%x ATAG_HREF -%x ATAG_BETWEEN -%x ATAG_DESC - -%% - -[^<]* { } - -"]*"href=\"" { BEGIN(ATAG_HREF); } - -[^\"]+ { - yylval = strdup(yytext); - BEGIN(ATAG_BETWEEN); - return TOKEN_URL; -} - -\" { - yylval = ""; - BEGIN(ATAG_BETWEEN); - return TOKEN_URL; -} - -[^>]* { } - -">" { BEGIN(ATAG_DESC); } - -[^<]* { - yylval = strdup(yytext); - BEGIN(INITIAL); - return TOKEN_TEXT; -} - -. { /* skip */ } - -<> { return MYEOF; } - -%% - -void scan_string(const char* str) { - yy_switch_to_buffer(yy_scan_string(str)); -} -- cgit v1.2.3