summaryrefslogtreecommitdiff
path: root/crawler
diff options
context:
space:
mode:
Diffstat (limited to 'crawler')
-rw-r--r--crawler/sanitize.go12
-rw-r--r--crawler/shop_drankdozijn.go48
-rw-r--r--crawler/utility.go65
3 files changed, 117 insertions, 8 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go
index 48a7b55..2cc839c 100644
--- a/crawler/sanitize.go
+++ b/crawler/sanitize.go
@@ -16,6 +16,11 @@ func sanitize_offer(angebote []Angebot, shop Shop, try int) []Angebot {
for _, offer := range angebote {
+ if offer.Spirit_type == "Wein" {
+ DebugOffer(offer, "Sanitizer: Skip offer because it's wine")
+ continue
+ }
+
offer.Name = sanitize_name(offer.Name)
if offer.Age == 0 {
@@ -57,7 +62,12 @@ func sanitize_offer(angebote []Angebot, shop Shop, try int) []Angebot {
continue
}
- //offer.Website = ""
+ // Otherwise the database explodes.
+ offer.Website = ""
+
+ if offer.Age == 0 {
+ DebugOffer(offer, "GREP")
+ }
W = append(W, offer)
}
diff --git a/crawler/shop_drankdozijn.go b/crawler/shop_drankdozijn.go
index 850f462..b60aaa8 100644
--- a/crawler/shop_drankdozijn.go
+++ b/crawler/shop_drankdozijn.go
@@ -68,6 +68,21 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
W.Name = api_data["saleDescription"].(string)
tmp_desc := api_data["group"].(map[string]interface{})
+
+ tmp_spirit_type := tmp_desc["description"].(string)
+
+ if "Bier" == tmp_spirit_type {
+ DebugOffer(W, "Drankdozijn: skip offer because it's beer")
+ continue
+ }
+
+ /*
+ if "Wein" == tmp_spirit_type {
+ DebugOffer(W, "Drankdozijn: skip offer because it's wine")
+ continue
+ }
+ */
+
W.Spirit_type = detect_spirit_type(tmp_desc["description"].(string))
//v, ok := api_data["price"]
@@ -132,7 +147,7 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
*/
if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") {
- DebugOffer(W, "Drankdozijn: Skip Offer")
+ DebugOffer(W, "Drankdozijn: Skip Offer because it contains gratis ware")
return
}
@@ -205,7 +220,7 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
}
})
- c.OnHTML(".main_description", func(e *colly.HTMLElement) {
+ c.OnHTML(".row .main_description", func(e *colly.HTMLElement) {
//log.Println(".main_price")
prev := ""
count := 0
@@ -236,14 +251,35 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
case "Kategorie", "Categorie":
//e.Request.Ctx.Put("spirit_type", e.Text)
tmp_type := e.Text
- if tmp_type == "Likör" {
+ tmp_type = detect_spirit_type(tmp_type)
+
+ if "Champagner" == tmp_type {
+ W.Spirit_type = tmp_type
+ } else if "Cognac" == W.Spirit_type {
+ if "Calvados" == tmp_type {
+ W.Spirit_type = tmp_type
+ }
+ }
+
+ /*
+ DebugOffer(W, tmp_type)
switch tmp_type {
+ case "Champagner", "Champagne":
+ W.Spirit_type = "Champagner"
case "Tequila":
W.Spirit_type = "Tequila"
}
- }
+ */
+ /*
+ if tmp_type == "Likör" {
+ switch tmp_type {
+ case "Tequila":
+ W.Spirit_type = "Tequila"
+ }
+ }
- if tmp_type == "Wein" {
+ if tmp_type == "Wein" {
+ }
switch tmp_type {
case "Champagner", "Champagne":
W.Spirit_type = "Champagner"
@@ -251,7 +287,7 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
DebugOffer(W, "Drankdozijn: Skip Offer")
return
}
- }
+ */
}
prev = ""
diff --git a/crawler/utility.go b/crawler/utility.go
index 0650546..8ec0099 100644
--- a/crawler/utility.go
+++ b/crawler/utility.go
@@ -48,13 +48,27 @@ func detect_spirit_type(name string) string {
if matched {
return "Whisky"
}
- matched, err = regexp.MatchString(`(^|\s)Champagner(\s|$)`, name)
+ matched, err = regexp.MatchString(`(^|\s)Wein(\s|$)`, name)
+ if err != nil {
+ Fatal(err, "Wein regex failed")
+ }
+ if matched {
+ return "Wein"
+ }
+ matched, err = regexp.MatchString(`(^|\s)(Champagner)|(Champagne)(\s|$)`, name)
if err != nil {
Fatal(err, "Champagner regex failed")
}
if matched {
return "Champagner"
}
+ matched, err = regexp.MatchString(`(^|\s)(Brandy)|(Weinbrand)(\s|$)`, name)
+ if err != nil {
+ Fatal(err, "Brandy regex failed")
+ }
+ if matched {
+ return "Brandy"
+ }
matched, err = regexp.MatchString(`(^|\s)Cognac(\s|$)`, name)
if err != nil {
Fatal(err, "Cognac regex failed")
@@ -62,6 +76,13 @@ func detect_spirit_type(name string) string {
if matched {
return "Cognac"
}
+ matched, err = regexp.MatchString(`(^|\s)Calvados(\s|$)`, name)
+ if err != nil {
+ Fatal(err, "Calvados regex failed")
+ }
+ if matched {
+ return "Calvados"
+ }
matched, err = regexp.MatchString(`(^|\s)Grappa(\s|$)`, name)
if err != nil {
Fatal(err, "Grappa regex failed")
@@ -76,6 +97,48 @@ func detect_spirit_type(name string) string {
if matched {
return "Likör"
}
+ matched, err = regexp.MatchString(`(^|\s)(Vermouth)|(Wermut)(\s|$)`, name)
+ if err != nil {
+ Fatal(err, "Vermouth|Wermut regex failed")
+ }
+ if matched {
+ return "Wermut"
+ }
+ matched, err = regexp.MatchString(`(^|\s)[G|J]enever(\s|$)`, name)
+ if err != nil {
+ Fatal(err, "Genever regex failed")
+ }
+ if matched {
+ return "Genever"
+ }
+ matched, err = regexp.MatchString(`(^|\s)Baijiu(\s|$)`, name)
+ if err != nil {
+ Fatal(err, "Baijiu regex failed")
+ }
+ if matched {
+ return "Baijiu"
+ }
+ matched, err = regexp.MatchString(`(^|\s)(Sherry|Oloroso|Fino|Amontillado)(\s|$)`, name)
+ if err != nil {
+ Fatal(err, "Sherry regex failed")
+ }
+ if matched {
+ return "Sherry"
+ }
+ matched, err = regexp.MatchString(`((^|\s)Port(wein)?(\s|$))|((^|\s)(Ruby|Tawny)(\s|$))`, name)
+ if err != nil {
+ Fatal(err, "Portwein regex failed")
+ }
+ if matched {
+ return "Portwein"
+ }
+ matched, err = regexp.MatchString(`(^|\s)Sake(\s|$)`, name)
+ if err != nil {
+ Fatal(err, "Sake regex failed")
+ }
+ if matched {
+ return "Sake"
+ }
return "Verschiedenes"
}