summaryrefslogtreecommitdiff
path: root/crawler/sanitize.go
diff options
context:
space:
mode:
Diffstat (limited to 'crawler/sanitize.go')
-rw-r--r--crawler/sanitize.go26
1 files changed, 26 insertions, 0 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go
index 2fef9a4..6370588 100644
--- a/crawler/sanitize.go
+++ b/crawler/sanitize.go
@@ -2,6 +2,7 @@ package main
import (
"fmt"
+ "net/http"
"regexp"
"strconv"
"strings"
@@ -49,6 +50,13 @@ func sanitize_offer(angebote []Angebot, shop Shop, try int) []Angebot {
continue
}
+ if err := sanitize_image_url(offer.Image_url); err != nil {
+ offer.error_ctx = offer.Image_url
+ offer.error_msg = err.Error()
+ WarnOffer(offer, "Sanitizer: Image-URL is not valid")
+ continue
+ }
+
//offer.Website = ""
W = append(W, offer)
@@ -254,3 +262,21 @@ func get_age_from_name(name string) int {
}
return age
}
+
+func sanitize_image_url(url string) error {
+
+ resp, err := http.Head(url)
+ if err != nil {
+ return fmt.Errorf("sanitize_image_url: HEAD request failed. Got error: %s \n", err.Error())
+ }
+
+ if resp.StatusCode != 200 {
+ return fmt.Errorf("sanitize_image_url: HEAD request failed. StatusCode not 200, got %d \n", resp.StatusCode)
+ }
+
+ if !strings.HasPrefix(resp.Header.Get("Content-Type"), "image") {
+ return fmt.Errorf("sanitize_image_url: HEAD request failed. Got no image, content-type is %s \n", resp.Header.Get("Content-Type"))
+ }
+
+ return nil
+}