summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhorus_arch2016-09-09 12:30:02 +0200
committerhorus_arch2016-09-09 12:30:02 +0200
commit542b26d4dcc90f8186900d51960e3dfefaa0ad01 (patch)
treefc29f80a69a0a20deec97bd5f00efef599a1d0ad
parent32685cc367b86c81df3f025f63261aa91ea3cd8c (diff)
downloaduhttpd-542b26d4dcc90f8186900d51960e3dfefaa0ad01.tar.gz
Removes dependencies on cgo (flex + bison)
-rw-r--r--scanner.go56
-rw-r--r--urlscanner.h18
-rw-r--r--urlscanner.l52
3 files changed, 24 insertions, 102 deletions
diff --git a/scanner.go b/scanner.go
index 9b35de3..b972b70 100644
--- a/scanner.go
+++ b/scanner.go
@@ -1,14 +1,11 @@
package main
-/*
-#include <stdio.h>
-#include "urlscanner.h"
-*/
-import "C"
import (
"html/template"
"log"
"strings"
+
+ "github.com/PuerkitoBio/goquery"
)
type Link struct {
@@ -18,39 +15,34 @@ type Link struct {
}
func getToken(input string) []Link {
- var token C.int
- var ls []Link
- var l Link
-
- C.scan_string(C.CString(input))
-
- for token = C.yylex(); token != C.MYEOF; token = C.yylex() {
- if token == C.TOKEN_URL {
- // flex reads the href attr
+ var links []Link
+ var link Link
- l.Url = template.URL(C.GoString(C.yylval))
+ html_code := strings.NewReader(input)
+ doc, err := goquery.NewDocumentFromReader(html_code)
+ if err != nil {
+ log.Fatal(err)
+ }
- if strings.HasSuffix(C.GoString(C.yylval), "/") {
- l.IsDir = true
- }
+ doc.Find("a").Each(func(i int, s *goquery.Selection) {
- } else if token == C.TOKEN_TEXT {
- // flex reads the link description
+ url, ok := s.Attr("href")
+ if !ok {
+ log.Fatal("Fehler beim URL parsen")
+ }
+ link.Url = template.URL(url)
- l.Text = template.HTML(C.GoString(C.yylval))
- ls = append(ls, l)
- l = Link{}
+ if strings.HasSuffix(url, "/") {
+ link.IsDir = true
} else {
- // lexical error
-
- l = Link{}
- log.Printf("Lexical Error on line %d \n", C.yylineno)
- continue
+ link.IsDir = false
}
- }
- return ls
-}
+ link.Text = template.HTML(s.Text())
-// sort
+ links = append(links, link)
+ })
+
+ return links
+}
diff --git a/urlscanner.h b/urlscanner.h
deleted file mode 100644
index f815c41..0000000
--- a/urlscanner.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef URLSCANNER_H
-#define URLSCANNER_H
-
-#define MYEOF EOF
-#define TOKEN_URL 257
-#define TOKEN_TEXT 258
-
-#ifndef YYSTYPE
-#define YYSTYPE yystype
-typedef char* yystype;
-#endif
-
-extern int yylex();
-extern yystype yylval;
-extern int yylineno;
-extern void scan_string ( const char *str );
-
-#endif
diff --git a/urlscanner.l b/urlscanner.l
deleted file mode 100644
index 79768b2..0000000
--- a/urlscanner.l
+++ /dev/null
@@ -1,52 +0,0 @@
-%option noyywrap
-%option yylineno
-%option nounput
-
-%{
-#include "urlscanner.h"
-#include "string.h"
-yystype yylval;
-#define YY_NO_INPUT
-%}
-
-%x ATAG_HREF
-%x ATAG_BETWEEN
-%x ATAG_DESC
-
-%%
-
-[^<]* { }
-
-"<a"[^>]*"href=\"" { BEGIN(ATAG_HREF); }
-
-<ATAG_HREF>[^\"]+ {
- yylval = strdup(yytext);
- BEGIN(ATAG_BETWEEN);
- return TOKEN_URL;
-}
-
-<ATAG_HREF>\" {
- yylval = "";
- BEGIN(ATAG_BETWEEN);
- return TOKEN_URL;
-}
-
-<ATAG_BETWEEN>[^>]* { }
-
-<ATAG_BETWEEN>">" { BEGIN(ATAG_DESC); }
-
-<ATAG_DESC>[^<]* {
- yylval = strdup(yytext);
- BEGIN(INITIAL);
- return TOKEN_TEXT;
-}
-
-. { /* skip */ }
-
-<<EOF>> { return MYEOF; }
-
-%%
-
-void scan_string(const char* str) {
- yy_switch_to_buffer(yy_scan_string(str));
-}