diff options
Diffstat (limited to 'crawler')
| -rw-r--r-- | crawler/shop_drankdozijn.go | 156 | ||||
| -rw-r--r-- | crawler/utility.go | 59 |
2 files changed, 89 insertions, 126 deletions
diff --git a/crawler/shop_drankdozijn.go b/crawler/shop_drankdozijn.go index b60aaa8..b2ceb82 100644 --- a/crawler/shop_drankdozijn.go +++ b/crawler/shop_drankdozijn.go @@ -5,7 +5,6 @@ import ( "io/ioutil" "net/http" "strings" - //"strconv" log "github.com/Sirupsen/logrus" "github.com/gocolly/colly" @@ -76,23 +75,12 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { continue } - /* - if "Wein" == tmp_spirit_type { - DebugOffer(W, "Drankdozijn: skip offer because it's wine") - continue - } - */ - W.Spirit_type = detect_spirit_type(tmp_desc["description"].(string)) - //v, ok := api_data["price"] - //log.Println(v, ok) if v, _ := api_data["price"]; v == nil { //DebugOffer(W, "Drankdozijn: Skip Offer") DebugOffer(W, "Drankdozijn: price is nil -> skip offer") continue - } else { - //log.Println("price is NOT nil -> NOT SKIPPING!") } W.Original_price, err = convert_price(api_data["price"].(string)) if err != nil { @@ -124,27 +112,11 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { W.Url = "https://drankdozijn.de/artikel/" + (tmp_map["alias"]).(string) tmp_image_map := tmp_map["images"].([]interface{}) W.Image_url = IMAGE_URL + tmp_image_map[0].(string) - //log.Println(W.Image_url) - - /* - for _, v2 := range tmp_image_map { - tmp_image := v2.(map[string]interface{}) - W.Image_url = IMAGE_URL + tmp_image["0"].(string) - log.Println(W.Image_url) - } - */ } c := app.customCollector([]string{"drankdozijn.de", "drankdozijn.nl"}) c.OnHTML(".product_top", func(e *colly.HTMLElement) { - // log.Println(".product_top") - - /* - e.ForEach(".product_image", func(i int, e *colly.HTMLElement) { - W.Image_url = e.ChildAttr("img", "src") - }) - */ if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") { DebugOffer(W, "Drankdozijn: Skip Offer because it contains gratis ware") @@ -153,64 +125,9 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { e.Request.Visit(W.Url) - /* - var ctx string - - W.Volume, ctx = get_volume(e) - if W.Volume == 0 { - W.error_msg = e.Request.Ctx.Get("volume") - W.error_ctx = ctx - PrintlnOffer(W, "Drankdozijn: Volume is zero") - return - } - - W.Abv, ctx = get_abv(e) - if W.Abv == 0 { - W.error_msg = "Drankdozijn: Abv is zero" - W.error_ctx = ctx - PrintlnOffer(W, "Drankdozijn: abv is zero") - return - } - - base_price_noisy := e.Request.Ctx.Get("base_price") - W.Base_price, err = convert_price(base_price_noisy) - if err != nil { - W.error_msg = err.Error() - W.error_ctx = e.ChildText(".price_l") - PrintlnOffer(W, "Drankdozijn: Converting base price failed") - return - } - - if W.Spirit_type == "Cognac" { - W.Spirit_type = e.Request.Ctx.Get("spirit_type") - } - - if W.Spirit_type == "Likör" { - tmp_type := e.Request.Ctx.Get("spirit_type") - switch tmp_type { - case "Tequila": - W.Spirit_type = "Tequila" - } - } - - if W.Spirit_type == "Wein" { - tmp_type := e.Request.Ctx.Get("spirit_type") - switch tmp_type { - case "Champagner": - case "Champagne": - W.Spirit_type = "Champagner" - default: - DebugOffer(W, "Drankdozijn: Skip Offer") - return - } - } - */ }) c.OnHTML(".main_price", func(e *colly.HTMLElement) { - //log.Println(".main_price") - //e.Request.Ctx.Put("base_price", strings.TrimPrefix(e.ChildText(".price_l"), "/L")) - //e.Request.Ctx.Put("base_price", e.ChildText(".price_l")) W.Base_price, err = convert_price(e.ChildText(".price_l")) if err != nil { W.error_msg = err.Error() @@ -221,17 +138,14 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { }) c.OnHTML(".row .main_description", func(e *colly.HTMLElement) { - //log.Println(".main_price") prev := "" count := 0 e.ForEach(".col-xs-6", func(i int, e *colly.HTMLElement) { if count%2 == 0 { prev = e.Text } else { - //log.Println(strings.TrimSpace(prev) + ": " + e.Text) switch strings.TrimSpace(prev) { case "Inhalt", "Inhoud": - //e.Request.Ctx.Put("volume", e.Text) W.Volume, err = extract_volume(e.Text) if W.Volume == 0 { W.error_msg = e.Text @@ -240,7 +154,6 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { return } case "Alkoholgehalt", "Alcoholpercentage": - //e.Request.Ctx.Put("abv", e.Text) W.Abv, err = extract_abv(e.Text) if W.Abv == 0 { W.error_msg = "Drankdozijn: Abv is zero" @@ -249,45 +162,53 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { return } case "Kategorie", "Categorie": - //e.Request.Ctx.Put("spirit_type", e.Text) tmp_type := e.Text tmp_type = detect_spirit_type(tmp_type) if "Champagner" == tmp_type { - W.Spirit_type = tmp_type - } else if "Cognac" == W.Spirit_type { - if "Calvados" == tmp_type { + if tmp_type != W.Spirit_type { + DebugOffer(W, "Spirit Type Changed: "+W.Spirit_type+" -> "+tmp_type) W.Spirit_type = tmp_type } + W.Spirit_type = tmp_type } - /* - DebugOffer(W, tmp_type) - switch tmp_type { - case "Champagner", "Champagne": - W.Spirit_type = "Champagner" - case "Tequila": - W.Spirit_type = "Tequila" + switch W.Spirit_type { + case "Cognac": + if tmp_type != W.Spirit_type { + DebugOffer(W, "Spirit Type Changed: "+W.Spirit_type+" -> "+tmp_type) + W.Spirit_type = tmp_type } - */ - /* - if tmp_type == "Likör" { - switch tmp_type { - case "Tequila": - W.Spirit_type = "Tequila" - } + W.Spirit_type = tmp_type + case "Brandy": + if tmp_type != W.Spirit_type { + DebugOffer(W, "Spirit Type Changed: "+W.Spirit_type+" -> "+tmp_type) + W.Spirit_type = tmp_type } - - if tmp_type == "Wein" { + W.Spirit_type = tmp_type + case "Sherry": + if tmp_type != W.Spirit_type { + DebugOffer(W, "Spirit Type Changed: "+W.Spirit_type+" -> "+tmp_type) + W.Spirit_type = tmp_type + } + W.Spirit_type = tmp_type + case "Likör": + if "Tequila" == tmp_type { + if tmp_type != W.Spirit_type { + DebugOffer(W, "Spirit Type Changed: "+W.Spirit_type+" -> "+tmp_type) + W.Spirit_type = tmp_type + } + W.Spirit_type = tmp_type } - switch tmp_type { - case "Champagner", "Champagne": - W.Spirit_type = "Champagner" - default: - DebugOffer(W, "Drankdozijn: Skip Offer") - return + if "Baijiu" == tmp_type { + if tmp_type != W.Spirit_type { + DebugOffer(W, "Spirit Type Changed: "+W.Spirit_type+" -> "+tmp_type) + W.Spirit_type = tmp_type + } + W.Spirit_type = tmp_type } - */ + } + } prev = "" @@ -297,14 +218,7 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { }) c.OnHTML("body", func(e *colly.HTMLElement) { - /* - log.Println("body") - e.Request.Ctx.Put("website", string(e.Response.Body)) - - W.Website = e.Request.Ctx.Get("website") - */ W.Website = string(e.Response.Body) - //W.Website = string(e.Response.Body) }) err = c.Visit(W.Url) diff --git a/crawler/utility.go b/crawler/utility.go index 8ec0099..e59b40b 100644 --- a/crawler/utility.go +++ b/crawler/utility.go @@ -62,12 +62,12 @@ func detect_spirit_type(name string) string { if matched { return "Champagner" } - matched, err = regexp.MatchString(`(^|\s)(Brandy)|(Weinbrand)(\s|$)`, name) + matched, err = regexp.MatchString(`(^|\s)Armagnac(\s|$)`, name) if err != nil { - Fatal(err, "Brandy regex failed") + Fatal(err, "Armagnac regex failed") } if matched { - return "Brandy" + return "Armagnac" } matched, err = regexp.MatchString(`(^|\s)Cognac(\s|$)`, name) if err != nil { @@ -76,6 +76,13 @@ func detect_spirit_type(name string) string { if matched { return "Cognac" } + matched, err = regexp.MatchString(`(^|\s)(Brandy)|(Weinbrand)(\s|$)`, name) + if err != nil { + Fatal(err, "Brandy regex failed") + } + if matched { + return "Brandy" + } matched, err = regexp.MatchString(`(^|\s)Calvados(\s|$)`, name) if err != nil { Fatal(err, "Calvados regex failed") @@ -90,7 +97,7 @@ func detect_spirit_type(name string) string { if matched { return "Grappa" } - matched, err = regexp.MatchString(`(^|\s)Likör(\s|$)`, name) + matched, err = regexp.MatchString(`(^|\s)(\wlikör)|(Likör)(\s|$)`, name) if err != nil { Fatal(err, "Likör regex failed") } @@ -104,7 +111,7 @@ func detect_spirit_type(name string) string { if matched { return "Wermut" } - matched, err = regexp.MatchString(`(^|\s)[G|J]enever(\s|$)`, name) + matched, err = regexp.MatchString(`(^|\s)([G|J]enever)|(Korenwijn)(\s|$)`, name) if err != nil { Fatal(err, "Genever regex failed") } @@ -132,6 +139,13 @@ func detect_spirit_type(name string) string { if matched { return "Portwein" } + matched, err = regexp.MatchString(`(^|\s)Obstler|Obstbrand|Edelbrand(\s|$)`, name) + if err != nil { + Fatal(err, "Obstbrand regex failed") + } + if matched { + return "Obstbrand" + } matched, err = regexp.MatchString(`(^|\s)Sake(\s|$)`, name) if err != nil { Fatal(err, "Sake regex failed") @@ -139,6 +153,41 @@ func detect_spirit_type(name string) string { if matched { return "Sake" } + matched, err = regexp.MatchString(`(^|\s)Shochu(\s|$)`, name) + if err != nil { + Fatal(err, "Shochu regex failed") + } + if matched { + return "Shochu" + } + matched, err = regexp.MatchString(`(^|\s)Pisco(\s|$)`, name) + if err != nil { + Fatal(err, "Pisco regex failed") + } + if matched { + return "Pisco" + } + matched, err = regexp.MatchString(`(^|\s)Absinth(\s|$)`, name) + if err != nil { + Fatal(err, "Absinth regex failed") + } + if matched { + return "Absinth" + } + matched, err = regexp.MatchString(`(^|\s)Tequila(\s|$)`, name) + if err != nil { + Fatal(err, "Tequila regex failed") + } + if matched { + return "Tequila" + } + matched, err = regexp.MatchString(`(^|\s)Mezcal(\s|$)`, name) + if err != nil { + Fatal(err, "Mezcal regex failed") + } + if matched { + return "Mezcal" + } return "Verschiedenes" } |
