summaryrefslogtreecommitdiff
path: root/crawler/init.go
diff options
context:
space:
mode:
authorhorus2018-06-15 23:28:18 +0200
committerhorus2018-06-15 23:28:18 +0200
commit8d68ac7c900241eb8499a94c23ab1f60750e7aed (patch)
tree3a5d444f866383d5cdefc512242dc2afa236641e /crawler/init.go
parent0026ba55f03c5378d5773459fcdd7c6931ff42a5 (diff)
downloadalkobote-8d68ac7c900241eb8499a94c23ab1f60750e7aed.tar.gz
Introduces config for user agent, robots.txt and crawler delay. (crawler)
Diffstat (limited to 'crawler/init.go')
-rw-r--r--crawler/init.go13
1 files changed, 13 insertions, 0 deletions
diff --git a/crawler/init.go b/crawler/init.go
index 60f7e47..668df2d 100644
--- a/crawler/init.go
+++ b/crawler/init.go
@@ -23,6 +23,9 @@ func init() {
loglevel_f := flag.StringP("loglevel", "l", "Warn", `sets log level, can be "Warn", "Info" or "Debug"`)
flag.Bool("list-shops", false, `lists all crawlable shops`)
shopids_f := flag.StringP("restrict-shops", "r", "", `comma separated list of shop ids, crawls only these`)
+ user_agent_f := flag.StringP("user-agent", "u", "", "sets user agent")
+ delay_f := flag.Int("delay", 0, "toggles random delay between crawls")
+ ignore_robots_f := flag.Bool("ignore-robots-txt", true, "ignores robots.txt")
flag.Parse()
loglevel := strings.ToLower(*loglevel_f)
@@ -41,6 +44,16 @@ func init() {
_conf.parseConfig(*configFile)
+ if *user_agent_f != "" {
+ _conf.UserAgent = *user_agent_f
+ }
+ if *delay_f != 0 {
+ _conf.Delay = *delay_f
+ }
+ if !*ignore_robots_f {
+ _conf.IgnoreRobotsTXT = *ignore_robots_f
+ }
+
if _conf.Debug && !*silent {
log.SetLevel(log.DebugLevel)
}