summaryrefslogtreecommitdiff
path: root/crawler/init.go
blob: 668df2dea5d6d575af5514d496c450b0d5aad6bf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
package main

import (
	"errors"
	"strings"

	log "github.com/Sirupsen/logrus"
	flag "github.com/spf13/pflag"
)

// global config, gets overwritten by main
var _conf Config

func init() {
	// overwrites unhelpful error message
	flag.ErrHelp = errors.New("")

	// we need to parse the config because of log level setting
	configFile := flag.StringP("config", "c", "", "path to config file")
	debug := flag.BoolP("debug", "d", false, "debug outputs")
	verbose := flag.BoolP("verbose", "v", false, "same as --debug")
	silent := flag.BoolP("silent", "s", false, "suppress outputs except warnings")
	loglevel_f := flag.StringP("loglevel", "l", "Warn", `sets log level, can be "Warn", "Info" or "Debug"`)
	flag.Bool("list-shops", false, `lists all crawlable shops`)
	shopids_f := flag.StringP("restrict-shops", "r", "", `comma separated list of shop ids, crawls only these`)
	user_agent_f := flag.StringP("user-agent", "u", "", "sets user agent")
	delay_f := flag.Int("delay", 0, "toggles random delay between crawls")
	ignore_robots_f := flag.Bool("ignore-robots-txt", true, "ignores robots.txt")

	flag.Parse()
	loglevel := strings.ToLower(*loglevel_f)

	if *debug || *verbose || loglevel == "debug" {
		log.SetLevel(log.DebugLevel)
	} else if loglevel == "info" {
		log.SetLevel(log.InfoLevel)
	} else {
		log.SetLevel(log.WarnLevel)
	}

	if *silent {
		log.SetLevel(log.WarnLevel)
	}

	_conf.parseConfig(*configFile)

	if *user_agent_f != "" {
		_conf.UserAgent = *user_agent_f
	}
	if *delay_f != 0 {
		_conf.Delay = *delay_f
	}
	if !*ignore_robots_f {
		_conf.IgnoreRobotsTXT = *ignore_robots_f
	}

	if _conf.Debug && !*silent {
		log.SetLevel(log.DebugLevel)
	}

	if "" != *shopids_f {
		_conf.ShopIDs = strings.Split(*shopids_f, ",")
	}
}