summaryrefslogtreecommitdiff
path: root/github.go
blob: 410d847678bfaacc8583c29ab05d22ebac02d4ef (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
package main

import (
	"strconv"
	"strings"

	"github.com/gocolly/colly"
)

func (app *App) ScrapeGithub(platform Platform) []Entry {
	var err error

	URL := platform.URL
	Languages := app.GetLanguages()
	UpdatePeriods := app.GetUpdatePeriods()
	current_language := Language{}
	current_update_period := UpdatePeriod{}

	Entries := []Entry{}

	c := app.customCollector([]string{"www.github.com", "github.com"})

	c.OnHTML("ol.repo-list > li", func(e *colly.HTMLElement) {
		entry := Entry{}
		owner := Owner{}

		e.ForEach("div > h3", func(i int, e *colly.HTMLElement) {
			entry.URL = URL + e.ChildAttr("a", "href")
			entry.Title = e.ChildText("a")
			owner.Name = strings.TrimSuffix(e.ChildText("a > span"), " /")
		})

		e.ForEach("div.py-1", func(i int, e *colly.HTMLElement) {
			entry.Synopsis = e.ChildText("p")
		})

		e.ForEach("div.text-gray", func(i int, e *colly.HTMLElement) {
			if i == 0 {
				entry.Stars, err = strconv.Atoi(e.ChildText("a.muted-text"))
				if err != nil {
					Warn(err, "Github: Extracting stars from "+entry.Title+" failed")
				}
			}
		})

		owner.Platform = &platform
		owner.URL = URL + owner.Name

		entry.Owner = &owner
		entry.Platform = &platform
		entry.Language = &current_language
		entry.UpdatePeriod = &current_update_period
		entry.Created_At = app.Now

		Entries = append(Entries, entry)
	})

	for _, l := range Languages {

		current_language = l

		for _, t := range UpdatePeriods {

			current_update_period = t

			CURRENT_URL := URL + "/trending/" + l.Name + "?since=" + t.Name

			err := c.Visit(CURRENT_URL)
			if err != nil {
				Warn(err, "Scraping Platform "+platform.Name+" failed with URL: "+CURRENT_URL)
			}
		}
	}

	return Entries
}