diff options
Diffstat (limited to 'scanner.go')
| -rw-r--r-- | scanner.go | 56 |
1 files changed, 24 insertions, 32 deletions
@@ -1,14 +1,11 @@ package main -/* -#include <stdio.h> -#include "urlscanner.h" -*/ -import "C" import ( "html/template" "log" "strings" + + "github.com/PuerkitoBio/goquery" ) type Link struct { @@ -18,39 +15,34 @@ type Link struct { } func getToken(input string) []Link { - var token C.int - var ls []Link - var l Link - - C.scan_string(C.CString(input)) - - for token = C.yylex(); token != C.MYEOF; token = C.yylex() { - if token == C.TOKEN_URL { - // flex reads the href attr + var links []Link + var link Link - l.Url = template.URL(C.GoString(C.yylval)) + html_code := strings.NewReader(input) + doc, err := goquery.NewDocumentFromReader(html_code) + if err != nil { + log.Fatal(err) + } - if strings.HasSuffix(C.GoString(C.yylval), "/") { - l.IsDir = true - } + doc.Find("a").Each(func(i int, s *goquery.Selection) { - } else if token == C.TOKEN_TEXT { - // flex reads the link description + url, ok := s.Attr("href") + if !ok { + log.Fatal("Fehler beim URL parsen") + } + link.Url = template.URL(url) - l.Text = template.HTML(C.GoString(C.yylval)) - ls = append(ls, l) - l = Link{} + if strings.HasSuffix(url, "/") { + link.IsDir = true } else { - // lexical error - - l = Link{} - log.Printf("Lexical Error on line %d \n", C.yylineno) - continue + link.IsDir = false } - } - return ls -} + link.Text = template.HTML(s.Text()) -// sort + links = append(links, link) + }) + + return links +} |
