diff options
| author | horus | 2019-05-07 16:04:41 +0200 |
|---|---|---|
| committer | horus | 2019-05-07 16:04:41 +0200 |
| commit | a347c584e0e650219d71941990302c4be558da22 (patch) | |
| tree | 0842697207c5cd744d3c0964f870f791590d96a7 | |
| parent | 21fa504804b35e689adcb89c483ae2a8deb82d83 (diff) | |
| download | alkobote-a347c584e0e650219d71941990302c4be558da22.tar.gz | |
Better spirit type detection. (crawler)
| -rw-r--r-- | crawler/sanitize.go | 12 | ||||
| -rw-r--r-- | crawler/shop_drankdozijn.go | 48 | ||||
| -rw-r--r-- | crawler/utility.go | 65 |
3 files changed, 117 insertions, 8 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go index 48a7b55..2cc839c 100644 --- a/crawler/sanitize.go +++ b/crawler/sanitize.go @@ -16,6 +16,11 @@ func sanitize_offer(angebote []Angebot, shop Shop, try int) []Angebot { for _, offer := range angebote { + if offer.Spirit_type == "Wein" { + DebugOffer(offer, "Sanitizer: Skip offer because it's wine") + continue + } + offer.Name = sanitize_name(offer.Name) if offer.Age == 0 { @@ -57,7 +62,12 @@ func sanitize_offer(angebote []Angebot, shop Shop, try int) []Angebot { continue } - //offer.Website = "" + // Otherwise the database explodes. + offer.Website = "" + + if offer.Age == 0 { + DebugOffer(offer, "GREP") + } W = append(W, offer) } diff --git a/crawler/shop_drankdozijn.go b/crawler/shop_drankdozijn.go index 850f462..b60aaa8 100644 --- a/crawler/shop_drankdozijn.go +++ b/crawler/shop_drankdozijn.go @@ -68,6 +68,21 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { W.Name = api_data["saleDescription"].(string) tmp_desc := api_data["group"].(map[string]interface{}) + + tmp_spirit_type := tmp_desc["description"].(string) + + if "Bier" == tmp_spirit_type { + DebugOffer(W, "Drankdozijn: skip offer because it's beer") + continue + } + + /* + if "Wein" == tmp_spirit_type { + DebugOffer(W, "Drankdozijn: skip offer because it's wine") + continue + } + */ + W.Spirit_type = detect_spirit_type(tmp_desc["description"].(string)) //v, ok := api_data["price"] @@ -132,7 +147,7 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { */ if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") { - DebugOffer(W, "Drankdozijn: Skip Offer") + DebugOffer(W, "Drankdozijn: Skip Offer because it contains gratis ware") return } @@ -205,7 +220,7 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { } }) - c.OnHTML(".main_description", func(e *colly.HTMLElement) { + c.OnHTML(".row .main_description", func(e *colly.HTMLElement) { //log.Println(".main_price") prev := "" count := 0 @@ -236,14 +251,35 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { case "Kategorie", "Categorie": //e.Request.Ctx.Put("spirit_type", e.Text) tmp_type := e.Text - if tmp_type == "Likör" { + tmp_type = detect_spirit_type(tmp_type) + + if "Champagner" == tmp_type { + W.Spirit_type = tmp_type + } else if "Cognac" == W.Spirit_type { + if "Calvados" == tmp_type { + W.Spirit_type = tmp_type + } + } + + /* + DebugOffer(W, tmp_type) switch tmp_type { + case "Champagner", "Champagne": + W.Spirit_type = "Champagner" case "Tequila": W.Spirit_type = "Tequila" } - } + */ + /* + if tmp_type == "Likör" { + switch tmp_type { + case "Tequila": + W.Spirit_type = "Tequila" + } + } - if tmp_type == "Wein" { + if tmp_type == "Wein" { + } switch tmp_type { case "Champagner", "Champagne": W.Spirit_type = "Champagner" @@ -251,7 +287,7 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { DebugOffer(W, "Drankdozijn: Skip Offer") return } - } + */ } prev = "" diff --git a/crawler/utility.go b/crawler/utility.go index 0650546..8ec0099 100644 --- a/crawler/utility.go +++ b/crawler/utility.go @@ -48,13 +48,27 @@ func detect_spirit_type(name string) string { if matched { return "Whisky" } - matched, err = regexp.MatchString(`(^|\s)Champagner(\s|$)`, name) + matched, err = regexp.MatchString(`(^|\s)Wein(\s|$)`, name) + if err != nil { + Fatal(err, "Wein regex failed") + } + if matched { + return "Wein" + } + matched, err = regexp.MatchString(`(^|\s)(Champagner)|(Champagne)(\s|$)`, name) if err != nil { Fatal(err, "Champagner regex failed") } if matched { return "Champagner" } + matched, err = regexp.MatchString(`(^|\s)(Brandy)|(Weinbrand)(\s|$)`, name) + if err != nil { + Fatal(err, "Brandy regex failed") + } + if matched { + return "Brandy" + } matched, err = regexp.MatchString(`(^|\s)Cognac(\s|$)`, name) if err != nil { Fatal(err, "Cognac regex failed") @@ -62,6 +76,13 @@ func detect_spirit_type(name string) string { if matched { return "Cognac" } + matched, err = regexp.MatchString(`(^|\s)Calvados(\s|$)`, name) + if err != nil { + Fatal(err, "Calvados regex failed") + } + if matched { + return "Calvados" + } matched, err = regexp.MatchString(`(^|\s)Grappa(\s|$)`, name) if err != nil { Fatal(err, "Grappa regex failed") @@ -76,6 +97,48 @@ func detect_spirit_type(name string) string { if matched { return "Likör" } + matched, err = regexp.MatchString(`(^|\s)(Vermouth)|(Wermut)(\s|$)`, name) + if err != nil { + Fatal(err, "Vermouth|Wermut regex failed") + } + if matched { + return "Wermut" + } + matched, err = regexp.MatchString(`(^|\s)[G|J]enever(\s|$)`, name) + if err != nil { + Fatal(err, "Genever regex failed") + } + if matched { + return "Genever" + } + matched, err = regexp.MatchString(`(^|\s)Baijiu(\s|$)`, name) + if err != nil { + Fatal(err, "Baijiu regex failed") + } + if matched { + return "Baijiu" + } + matched, err = regexp.MatchString(`(^|\s)(Sherry|Oloroso|Fino|Amontillado)(\s|$)`, name) + if err != nil { + Fatal(err, "Sherry regex failed") + } + if matched { + return "Sherry" + } + matched, err = regexp.MatchString(`((^|\s)Port(wein)?(\s|$))|((^|\s)(Ruby|Tawny)(\s|$))`, name) + if err != nil { + Fatal(err, "Portwein regex failed") + } + if matched { + return "Portwein" + } + matched, err = regexp.MatchString(`(^|\s)Sake(\s|$)`, name) + if err != nil { + Fatal(err, "Sake regex failed") + } + if matched { + return "Sake" + } return "Verschiedenes" } |
