diff options
| -rw-r--r-- | crawler/sanitize.go | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go index 2fef9a4..6370588 100644 --- a/crawler/sanitize.go +++ b/crawler/sanitize.go @@ -2,6 +2,7 @@ package main import ( "fmt" + "net/http" "regexp" "strconv" "strings" @@ -49,6 +50,13 @@ func sanitize_offer(angebote []Angebot, shop Shop, try int) []Angebot { continue } + if err := sanitize_image_url(offer.Image_url); err != nil { + offer.error_ctx = offer.Image_url + offer.error_msg = err.Error() + WarnOffer(offer, "Sanitizer: Image-URL is not valid") + continue + } + //offer.Website = "" W = append(W, offer) @@ -254,3 +262,21 @@ func get_age_from_name(name string) int { } return age } + +func sanitize_image_url(url string) error { + + resp, err := http.Head(url) + if err != nil { + return fmt.Errorf("sanitize_image_url: HEAD request failed. Got error: %s \n", err.Error()) + } + + if resp.StatusCode != 200 { + return fmt.Errorf("sanitize_image_url: HEAD request failed. StatusCode not 200, got %d \n", resp.StatusCode) + } + + if !strings.HasPrefix(resp.Header.Get("Content-Type"), "image") { + return fmt.Errorf("sanitize_image_url: HEAD request failed. Got no image, content-type is %s \n", resp.Header.Get("Content-Type")) + } + + return nil +} |
