package main import ( "fmt" "net/http" "regexp" "strconv" "strings" log "github.com/Sirupsen/logrus" ) func sanitize_offer(angebote []Angebot, shop Shop, try int) []Angebot { var W []Angebot for _, offer := range angebote { if offer.Spirit_type == "Wein" { DebugOffer(offer, "Sanitizer: Skip offer because it's wine") continue } offer.Name = sanitize_name(offer.Name) if offer.Age == 0 { offer.Age = get_age_from_name(offer.Name) } if false == _check_abv_for_spirit_type(offer) { continue } if offer.Volume == 0 { WarnOffer(offer, "Sanitizer: Volume is zero") continue } if offer.Original_price == 0 { WarnOffer(offer, "Sanitizer: Original price is zero") continue } if offer.Discounted_price == 0 { WarnOffer(offer, "Sanitizer: Discounted price is zero") continue } if offer.Base_price == 0 { WarnOffer(offer, "Sanitizer: Base price is zero") continue } if offer.Url == "" { WarnOffer(offer, "Sanitizer: URL is empty") continue } if offer.Image_url == "" { WarnOffer(offer, "Sanitizer: Image-URL is empty") continue } if err := sanitize_image_url(offer.Image_url); err != nil { offer.error_ctx = offer.Image_url offer.error_msg = err.Error() WarnOffer(offer, "Sanitizer: Image-URL is not valid") continue } // Otherwise the database explodes. offer.Website = "" if offer.Age == 0 { DebugOffer(offer, "GREP") } W = append(W, offer) } if len(W) < 1 { log.Warn(fmt.Sprintf(`Sanitizer: No results for shop: '%s' (%d.) try`, shop.Name, try)) } return W } func sanitize_name(name string) string { if strings.Contains(name, "Literflasche") { name = strings.Replace(name, "Literflasche", "", -1) } if strings.Contains(name, "y.o.") { name = strings.Replace(name, "y.o.", "Jahre", -1) } if strings.Contains(name, "years old") { name = strings.Replace(name, "years old", "Jahre", -1) } if strings.Contains(name, "years") { name = strings.Replace(name, "years", "Jahre", -1) } if strings.Contains(name, "Years Old") { name = strings.Replace(name, "Years Old", "Jahre", -1) } if strings.Contains(name, " Anos ") { name = strings.Replace(name, " Anos ", " Jahre ", -1) } if strings.Contains(name, " anos ") { name = strings.Replace(name, " anos ", " Jahre ", -1) } if strings.Contains(name, " Vol. ") { name = strings.Replace(name, " Vol. ", " ", -1) } if strings.Contains(name, " vol. ") { name = strings.Replace(name, " vol. ", " ", -1) } r_J, err := regexp.Compile(`[0-9]+(\s)*J(\s|-)`) if err != nil { Fatal(err, "sanitize_name: J-Regexp (J für Jahr) failed") } age_noisy := r_J.FindString(name) if age_noisy != "" { r_number, err := regexp.Compile(`[0-9]+`) if err != nil { Fatal(err, "sanitize_name: Number-Regexp failed") } age := r_number.FindString(age_noisy) name = strings.Replace(name, age_noisy, age+" Jahre ", 1) } // case insensitive r_liter, err := regexp.Compile(`(?i)[0-9]+([,.][0-9]+)?( )?c?l((iter)|(tr))?(\s|$)`) if err != nil { Fatal(err, "sanitize_name: Liter-Regexp failed") } for { name_liter := r_liter.FindString(name) if name_liter == "" { break } name = strings.Replace(name, name_liter, "", -1) name = strings.TrimSpace(name) } if strings.Contains(name, "Liter") { name = strings.Replace(name, "Liter", "", -1) } name = strings.TrimSpace(name) if strings.Contains(name, "liter") { name = strings.Replace(name, "liter", "", -1) } name = strings.TrimSpace(name) r_procent, err := regexp.Compile(`[0-9]+([,.][0-9]+)?\%`) if err != nil { Fatal(err, "sanitize_name: Procent-Regexp failed") } for { name_procent := r_procent.FindString(name) if name_procent == "" { break } name = strings.Replace(name, name_procent, "", -1) name = strings.TrimSpace(name) } r_release, err := regexp.Compile(`Release$`) if err != nil { Fatal(err, "sanitize_name: Release-Regexp failed") } name_release := r_release.FindString(name) name = strings.Replace(name, name_release, "", 1) name = strings.TrimSpace(name) r_2x, err := regexp.Compile(`[0-9]+( )*[xX]( )`) if err != nil { Fatal(err, "sanitize_name: '2x'-Regexp failed") } for { name_2x := r_2x.FindString(name) if name_2x == "" { break } name = strings.Replace(name, name_2x, "", -1) name = strings.TrimSpace(name) } // removes redundant white spaces r_ws, err := regexp.Compile(`\s(\s)+`) if err != nil { Fatal(err, "sanitize_name: White Space-Regexp failed") } for { ws := r_ws.FindString(name) if ws == "" { break } name = strings.Replace(name, ws, " ", -1) name = strings.TrimSpace(name) } return name } func sanitize_base_price(price_noisy string) (price int, err error) { if strings.Contains(price_noisy, "Preis pro Liter") { price_noisy = strings.Replace(price_noisy, "Preis pro Liter", "", -1) } if strings.Contains(price_noisy, " pro 1 l") { price_noisy = strings.Replace(price_noisy, " pro 1 l", "", -1) } if strings.Contains(price_noisy, " pro 1 stück") { price_noisy = strings.Replace(price_noisy, " pro 1 stück", "", -1) } if strings.Contains(price_noisy, " pro 1 Stück") { price_noisy = strings.Replace(price_noisy, " pro 1 Stück", "", -1) } if strings.Contains(price_noisy, "Grundpreis:") { price_noisy = strings.Replace(price_noisy, "Grundpreis", "", -1) } if strings.Contains(price_noisy, "/Liter") { price_noisy = strings.Replace(price_noisy, "/Liter", "", -1) } if strings.Contains(price_noisy, "/L") { price_noisy = strings.Replace(price_noisy, "/L", "", -1) } price_noisy = strings.TrimSpace(price_noisy) return convert_price(price_noisy) } func _check_abv_for_spirit_type(offer Angebot) bool { /* if offer.Abv < 40 && (offer.Spirit_type == "Whisky" || offer.Spirit_type == "Cognac") { WarnOffer(offer, "Sanitizer: Abv below 40% for "+offer.Spirit_type) return false } if offer.Abv < 37.5 && (offer.Spirit_type == "Rum" || offer.Spirit_type == "Gin" || offer.Spirit_type == "Wodka" || offer.Spirit_type == "Grappa") { WarnOffer(offer, "Sanitizer: Abv below 37,5% for "+offer.Spirit_type) return false } if offer.Abv < 14 && offer.Spirit_type == "Likör" { WarnOffer(offer, "Sanitizer: Abv below 14% for "+offer.Spirit_type) return false } */ if offer.Abv == 0 { WarnOffer(offer, "Sanitizer: Abv is zero") return false } return true } func get_age_from_name(name string) int { r_years, err := regexp.Compile(`[0-9]+\s*Jahre`) if err != nil { Fatal(err, "get_age_from_name: Years regexp failed") } age_noisy := r_years.FindString(name) if age_noisy == "" { log.Debug("get_age_from_name: No Age found in (" + name + ")") return 0 } r, err := regexp.Compile(`[0-9]+`) if err != nil { Fatal(err, "get_age_from_name: Numbers regexp failed") } age_noisy = r.FindString(age_noisy) age, err := strconv.Atoi(age_noisy) if err != nil { Warn(err, "get_age_from_name: String to int (atoi) failed") return 0 } return age } func sanitize_image_url(url string) error { log.Debugf("sanitize_image_url: Making HEAD request to %s \n", url) resp, err := http.Head(url) if err != nil { return fmt.Errorf("sanitize_image_url: HEAD request failed. Got error: %s \n", err.Error()) } if resp.StatusCode != 200 { return fmt.Errorf("sanitize_image_url: HEAD request failed. StatusCode not 200, got %d \n", resp.StatusCode) } if !strings.HasPrefix(resp.Header.Get("Content-Type"), "image") { return fmt.Errorf("sanitize_image_url: HEAD request failed. Got no image, content-type is %s \n", resp.Header.Get("Content-Type")) } return nil }