diff options
Diffstat (limited to 'crawler/main.go')
| -rw-r--r-- | crawler/main.go | 146 |
1 files changed, 146 insertions, 0 deletions
diff --git a/crawler/main.go b/crawler/main.go new file mode 100644 index 0000000..d91aa2e --- /dev/null +++ b/crawler/main.go @@ -0,0 +1,146 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "regexp" + "strconv" + "strings" + + //"github.com/davecgh/go-spew/spew" + "github.com/gocolly/colly" +) + +func main() { + Cards_URL := "http://wiki.dominionstrategy.com/index.php/All_Cards" + + Cards := []Card{} + SplitPile := []Card{} + + c := colly.NewCollector( + colly.AllowedDomains("wiki.dominionstrategy.com"), + ) + + c.OnHTML("table.sortable tr", func(e *colly.HTMLElement) { + + var err error + + if e.Request.URL.String() != Cards_URL { + return + } + + card := Card{} + + e.ForEach("td", func(i int, e *colly.HTMLElement) { + switch i { + case 0: + card.Name = strings.TrimSpace(e.Text) + card.Image = "http://wiki.dominionstrategy.com" + e.ChildAttr("img", "src") + + card.URL = e.ChildAttr("a", "href") + case 1: + card.Set = strings.TrimSpace(e.Text) + + if strings.Contains(card.Set, "1E") { + card.Set = "Base" + card.IsEdition1 = true + } + if strings.Contains(card.Set, "2E") { + card.Set = "Base" + card.IsEdition2 = true + } + case 2: + card.Type = strings.TrimSpace(e.Text) + case 3: + cost := Cost{} + coin_costs := e.ChildAttrs(".coin-icon img", "alt") + for _, v := range coin_costs { + if strings.Contains(v, "$") { + cost.Coin, err = strconv.Atoi(strings.TrimSuffix(strings.TrimSuffix(strings.TrimPrefix(v, "$"), "star"), "plus")) + if err != nil { + log.Fatal(err) + } + } + if strings.Contains(v, "P") { + cost.Potion = 1 + } + } + debt_costs := e.ChildAttr(".debt-icon img", "alt") + if strings.Contains(debt_costs, "D") { + cost.Debt, err = strconv.Atoi(strings.TrimSuffix(debt_costs, "D")) + if err != nil { + log.Fatal(err) + } + } + card.Cost = cost + case 4: + card.text = strings.TrimSpace(e.Text) + case 5: + card.Actions = strings.TrimSpace(e.Text) + case 6: + card.Cards = strings.TrimSpace(e.Text) + case 7: + card.Buy = strings.TrimSpace(e.Text) + case 8: + card.Coin = strings.TrimSpace(strings.TrimPrefix(e.ChildAttr(".coin-img img", "alt"), "$")) + case 9: + card.Trash = strings.TrimSpace(e.Text) + case 10: + card.Junk = strings.TrimSpace(e.Text) + case 11: + card.Gain = strings.TrimSpace(e.Text) + } + + }) + + if strings.Contains(card.text, "Split pile containing 5 ") { + SplitPile = append(SplitPile, card) + } else { + + e.Request.Visit(card.URL) + card.Name_de = e.Request.Ctx.Get("name") + //card.Text_de = e.Request.Ctx.Get("desc") + Cards = append(Cards, card) + } + }) + + c.OnHTML(".wikitable tr", func(e *colly.HTMLElement) { + if e.Request.URL.String() == Cards_URL { + return + } + if e.ChildText("th") != "German" { + return + } + e.ForEach("td", func(i int, e *colly.HTMLElement) { + switch i { + case 0: + name_de := e.Text + + r := regexp.MustCompile(`(\(.+\))`) + name_de = strings.TrimSpace(r.ReplaceAllString(name_de, "")) + e.Request.Ctx.Put("name", name_de) + case 3: + //e.Request.Ctx.Put("desc", e.Text) + } + }) + }) + + c.Visit(Cards_URL) + + Output := []Card{} + for _, v := range Cards { + if v.Name == "" { + continue + } + v.IsKingdom = IsKingdom(v, SplitPile) + card := SetDetails(v) + Output = append(Output, card) + } + + j, err := json.MarshalIndent(Output, "", " ") + if err != nil { + log.Fatal(err) + } + fmt.Println(string(j)) +} |
