summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoradmin2025-02-09 11:07:41 +0100
committeradmin2025-02-09 11:07:41 +0100
commit1b28f44a9f1c90e49ddf0149becaa004addc50d3 (patch)
tree2cf796046b1b4a57e6f9534c2c54b3ea5a3ed9f6
parentc97ca7b18c385e4a308bc508ef739d5c7581a97f (diff)
downloadcurious-crawler-master.tar.gz
category fixHEADmaster
-rw-r--r--categories.go7
-rw-r--r--database.go5
-rw-r--r--main.go1
3 files changed, 12 insertions, 1 deletions
diff --git a/categories.go b/categories.go
index dc71cad..a92b0df 100644
--- a/categories.go
+++ b/categories.go
@@ -147,6 +147,13 @@ func normalizeCategory(s string) string {
cat = strings.TrimSuffix(cat, "Taskforce")
cat = strings.TrimSuffix(cat, "Task Force")
cat = strings.TrimSuffix(cat, "work group")
+ cat = strings.TrimSuffix(cat, "Working Group")
+ cat = strings.TrimSuffix(cat, "Subpage")
+ cat = strings.TrimSuffix(cat, "subpage")
+ cat = strings.TrimSuffix(cat, "Sub-project")
+ cat = strings.TrimSuffix(cat, "sub-project")
+ cat = strings.TrimSuffix(cat, "Project")
+ cat = strings.TrimSuffix(cat, "project")
if strings.Contains(strings.ToLower(cat), "articles") {
return ""
diff --git a/database.go b/database.go
index bb6997d..b029ca7 100644
--- a/database.go
+++ b/database.go
@@ -1,11 +1,13 @@
package main
import (
- log "github.com/sirupsen/logrus"
"regexp"
"strconv"
+ log "github.com/sirupsen/logrus"
+
"database/sql"
+
_ "github.com/go-sql-driver/mysql"
)
@@ -428,6 +430,7 @@ func (app *App) fixAllCategories() {
}
for rows.Next() {
+
var category_id int
var category_name string
diff --git a/main.go b/main.go
index a31c459..f3d0a6b 100644
--- a/main.go
+++ b/main.go
@@ -43,6 +43,7 @@ func main() {
defer app.DB.Close()
//app.fixAllCategories()
+ //return
app.deleteOrphanedArticles()
app.topStories()