summaryrefslogtreecommitdiff
path: root/github.go
blob: 50564d62b17818c1cf98ce55a5f6a97b5cc10b02 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
package main

import (
	"strconv"
	"strings"

	log "github.com/Sirupsen/logrus"
	"github.com/gocolly/colly"
)

func (app *App) ScrapeGithub(platform Platform) []Entry {

	var err error

	URL := platform.URL
	Languages := app.GetLanguages()
	UpdatePeriods := app.GetUpdatePeriods()
	current_language := Language{}
	current_update_period := UpdatePeriod{}

	Entries := []Entry{}

	c := app.customCollector([]string{"www.github.com", "github.com"})

	c.OnHTML("ol.repo-list > li", func(e *colly.HTMLElement) {
		entry := Entry{}
		owner := Owner{}

		e.ForEach("div > h3", func(i int, e *colly.HTMLElement) {
			entry.URL = e.ChildAttr("a", "href")
			owner.Name = strings.TrimSuffix(e.ChildText("a > span"), " /")
			owner.Name = strings.TrimSpace(owner.Name)
			entry.Title = strings.TrimPrefix(e.ChildText("a"), owner.Name+" /")
			entry.Title = strings.TrimSpace(entry.Title)
		})

		e.ForEach("div.py-1", func(i int, e *colly.HTMLElement) {
			entry.Synopsis = e.ChildText("p")
		})

		e.ForEach("div.text-gray > a.muted-link", func(i int, e *colly.HTMLElement) {
			if strings.Contains(e.Attr("href"), "stargazers") {
				stars := strings.TrimSpace(strings.Replace(e.Text, ",", "", -1))
				entry.Stars, err = strconv.Atoi(stars)
				if err != nil {
					Warn(err, "Github: Extracting stars from "+entry.Title+" failed")
				}
			}
		})

		l := Language{}
		l.ID = current_language.ID
		l.Name = current_language.Name

		p := Platform{}
		p.ID = platform.ID
		p.Name = platform.Name
		p.URL = platform.URL

		u := UpdatePeriod{}
		u.ID = current_update_period.ID
		u.Name = current_update_period.Name

		owner.Platform = &platform
		owner.URL = URL + owner.Name

		entry.Owner = &owner
		entry.Platform = &p
		entry.Language = &l
		entry.UpdatePeriod = &u
		entry.Created_At = app.Now

		log.Debugf("%+v\n", owner)
		log.Debugf("%+v\n", entry)

		Entries = append(Entries, entry)
	})

	for _, l := range Languages {

		current_language = l

		for _, t := range UpdatePeriods {

			current_update_period = t

			CURRENT_URL := platform.URL + "/trending/" + l.Name + "?since=" + t.Name
			log.Println("Crawling " + CURRENT_URL)

			err := c.Visit(CURRENT_URL)
			if err != nil {
				Warn(err, "Scraping Platform "+platform.Name+" failed with URL: "+CURRENT_URL)
			}
		}
	}

	return Entries
}