summaryrefslogtreecommitdiff
path: root/crawler/sanitize.go
diff options
context:
space:
mode:
authorhorus2018-06-15 16:21:12 +0200
committerhorus2018-06-15 16:21:12 +0200
commitf37ebbb81785fb2c02f166b84581b9e92c829b2a (patch)
treec43a2c5c42b52c769aa6fc16647ce8bfc53362c7 /crawler/sanitize.go
parentc59d15dfc04e0fb75c8132a3ce778dcf801645c1 (diff)
downloadalkobote-f37ebbb81785fb2c02f166b84581b9e92c829b2a.tar.gz
Tries to validate image url by making head request. (crawler)
Diffstat (limited to 'crawler/sanitize.go')
-rw-r--r--crawler/sanitize.go26
1 files changed, 26 insertions, 0 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go
index 2fef9a4..6370588 100644
--- a/crawler/sanitize.go
+++ b/crawler/sanitize.go
@@ -2,6 +2,7 @@ package main
import (
"fmt"
+ "net/http"
"regexp"
"strconv"
"strings"
@@ -49,6 +50,13 @@ func sanitize_offer(angebote []Angebot, shop Shop, try int) []Angebot {
continue
}
+ if err := sanitize_image_url(offer.Image_url); err != nil {
+ offer.error_ctx = offer.Image_url
+ offer.error_msg = err.Error()
+ WarnOffer(offer, "Sanitizer: Image-URL is not valid")
+ continue
+ }
+
//offer.Website = ""
W = append(W, offer)
@@ -254,3 +262,21 @@ func get_age_from_name(name string) int {
}
return age
}
+
+func sanitize_image_url(url string) error {
+
+ resp, err := http.Head(url)
+ if err != nil {
+ return fmt.Errorf("sanitize_image_url: HEAD request failed. Got error: %s \n", err.Error())
+ }
+
+ if resp.StatusCode != 200 {
+ return fmt.Errorf("sanitize_image_url: HEAD request failed. StatusCode not 200, got %d \n", resp.StatusCode)
+ }
+
+ if !strings.HasPrefix(resp.Header.Get("Content-Type"), "image") {
+ return fmt.Errorf("sanitize_image_url: HEAD request failed. Got no image, content-type is %s \n", resp.Header.Get("Content-Type"))
+ }
+
+ return nil
+}