diff options
| author | admin | 2025-02-09 11:07:41 +0100 |
|---|---|---|
| committer | admin | 2025-02-09 11:07:41 +0100 |
| commit | 1b28f44a9f1c90e49ddf0149becaa004addc50d3 (patch) | |
| tree | 2cf796046b1b4a57e6f9534c2c54b3ea5a3ed9f6 | |
| parent | c97ca7b18c385e4a308bc508ef739d5c7581a97f (diff) | |
| download | curious-crawler-master.tar.gz | |
| -rw-r--r-- | categories.go | 7 | ||||
| -rw-r--r-- | database.go | 5 | ||||
| -rw-r--r-- | main.go | 1 |
3 files changed, 12 insertions, 1 deletions
diff --git a/categories.go b/categories.go index dc71cad..a92b0df 100644 --- a/categories.go +++ b/categories.go @@ -147,6 +147,13 @@ func normalizeCategory(s string) string { cat = strings.TrimSuffix(cat, "Taskforce") cat = strings.TrimSuffix(cat, "Task Force") cat = strings.TrimSuffix(cat, "work group") + cat = strings.TrimSuffix(cat, "Working Group") + cat = strings.TrimSuffix(cat, "Subpage") + cat = strings.TrimSuffix(cat, "subpage") + cat = strings.TrimSuffix(cat, "Sub-project") + cat = strings.TrimSuffix(cat, "sub-project") + cat = strings.TrimSuffix(cat, "Project") + cat = strings.TrimSuffix(cat, "project") if strings.Contains(strings.ToLower(cat), "articles") { return "" diff --git a/database.go b/database.go index bb6997d..b029ca7 100644 --- a/database.go +++ b/database.go @@ -1,11 +1,13 @@ package main import ( - log "github.com/sirupsen/logrus" "regexp" "strconv" + log "github.com/sirupsen/logrus" + "database/sql" + _ "github.com/go-sql-driver/mysql" ) @@ -428,6 +430,7 @@ func (app *App) fixAllCategories() { } for rows.Next() { + var category_id int var category_name string @@ -43,6 +43,7 @@ func main() { defer app.DB.Close() //app.fixAllCategories() + //return app.deleteOrphanedArticles() app.topStories() |
