From fa4dc4b0f8bef95f378b8fc2d72b6f8ebdf2a6ca Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sat, 20 Nov 2021 11:30:55 +0800 Subject: [PATCH 01/11] added air for debugging containers --- .gitignore | 1 + backend/.air.master.conf | 47 ++++++++++ backend/{.air.conf => .air.worker.conf} | 4 +- backend/go.mod | 2 +- backend/go.mod.local | 21 +++++ workspace/docker-compose.yml | 111 ++++++++---------------- workspace/dockerfiles/golang/Dockerfile | 30 +++++-- 7 files changed, 130 insertions(+), 86 deletions(-) create mode 100644 backend/.air.master.conf rename backend/{.air.conf => .air.worker.conf} (95%) create mode 100644 backend/go.mod.local diff --git a/.gitignore b/.gitignore index 47cefbd86..5e091d2d2 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ tmp/ _book/ *.lock vendor/ +.crawlab diff --git a/backend/.air.master.conf b/backend/.air.master.conf new file mode 100644 index 000000000..adf015979 --- /dev/null +++ b/backend/.air.master.conf @@ -0,0 +1,47 @@ +# Config file for [Air](https://github.com/cosmtrek/air) in TOML format + +# Working directory +# . or absolute path, please note that the directories following must be under root. +root = "." +tmp_dir = "/tmp" + +[build] +# Just plain old shell command. You could use `make` as well. +cmd = "go build -o ../tmp/main ./ " +# Binary file yields from `cmd`. +bin = "../tmp/main" +# Customize binary. +full_bin = "../tmp/main master" +# Watch these filename extensions. +include_ext = ["go", "tpl", "tmpl", "html"] +# Ignore these filename extensions or directories. +exclude_dir = ["assets", "tmp", "vendor", "frontend/node_modules"] +# Watch these directories if you specified. +include_dir = ["../libs"] +# Exclude files. +exclude_file = [ +# This log file places in your tmp_dir. +log = "air.log" +# It's not necessary to trigger build each time file changes if it's too frequent. +delay = 1000 # ms +# Stop running old binary when build errors occur. +stop_on_error = true +# Send Interrupt signal before killing process (windows does not support this feature) +send_interrupt = false +# Delay after sending Interrupt signal +kill_delay = 500 # ms + +[log] +# Show log time +time = false + +[color] +# Customize each part's color. If no color found, use the raw app log. +main = "magenta" +watcher = "cyan" +build = "yellow" +runner = "green" + +[misc] +# Delete tmp directory on exit +clean_on_exit = true \ No newline at end of file diff --git a/backend/.air.conf b/backend/.air.worker.conf similarity index 95% rename from backend/.air.conf rename to backend/.air.worker.conf index a6d352044..954ad0952 100644 --- a/backend/.air.conf +++ b/backend/.air.worker.conf @@ -11,13 +11,13 @@ cmd = "go build -o ../tmp/main ./ " # Binary file yields from `cmd`. bin = "../tmp/main" # Customize binary. -full_bin = "../tmp/main start" +full_bin = "../tmp/main worker" # Watch these filename extensions. include_ext = ["go", "tpl", "tmpl", "html"] # Ignore these filename extensions or directories. exclude_dir = ["assets", "tmp", "vendor", "frontend/node_modules"] # Watch these directories if you specified. -include_dir = [] +include_dir = ["../libs"] # Exclude files. exclude_file = [] # This log file places in your tmp_dir. diff --git a/backend/go.mod b/backend/go.mod index 61a6072c6..39860256a 100644 --- a/backend/go.mod +++ b/backend/go.mod @@ -1,6 +1,6 @@ module crawlab -go 1.15 +go 1.16 require ( github.com/apex/log v1.9.0 diff --git a/backend/go.mod.local b/backend/go.mod.local new file mode 100644 index 000000000..1bfca0b32 --- /dev/null +++ b/backend/go.mod.local @@ -0,0 +1,21 @@ +module crawlab + +go 1.15 + +replace ( + github.com/crawlab-team/crawlab-core => /libs/crawlab-team/crawlab-core + github.com/crawlab-team/crawlab-vcs => /libs/crawlab-team/crawlab-vcs + github.com/crawlab-team/crawlab-fs => /libs/crawlab-team/crawlab-fs + github.com/crawlab-team/crawlab-db => /libs/crawlab-team/crawlab-db +) + +require ( + github.com/apex/log v1.9.0 + github.com/crawlab-team/crawlab-core v0.6.0-beta.20211113.2050 + github.com/crawlab-team/go-trace v0.1.0 + github.com/gin-gonic/gin v1.6.3 + github.com/spf13/cobra v1.1.3 + github.com/spf13/viper v1.7.1 + go.mongodb.org/mongo-driver v1.6.0 // indirect + go.uber.org/dig v1.10.0 +) diff --git a/workspace/docker-compose.yml b/workspace/docker-compose.yml index 63bfd7848..131cd2abe 100644 --- a/workspace/docker-compose.yml +++ b/workspace/docker-compose.yml @@ -1,95 +1,56 @@ -version: "3.3" +version: '3.3' services: master: build: - context: dockerfiles/golang - command: "air -c .air.conf" + context: ./dockerfiles/golang + command: "air -c .air.master.conf" volumes: + - "./.crawlab/master:/root/.crawlab" - ../backend:/backend - - /backend/tmp - depends_on: - - mongo - - redis - ports: - - 8000:8000 + - ../backend/go.mod.local:/backend/go.mod + - ../..:/libs/crawlab-team environment: - CRAWLAB_SPIDER_PATH: "/spiders" - CRAWLAB_SETTING_ENABLEDEMOSPIDERS: "Y" + CRAWLAB_NODE_MASTER: "Y" CRAWLAB_MONGO_HOST: "mongo" - CRAWLAB_REDIS_ADDRESS: "redis" - CRAWLAB_SERVER_MASTER: "Y" - CRAWLAB_SERVER_REGISTER_TYPE: "customName" - CRAWLAB_SERVER_REGISTER_CUSTOMNODENAME: "master_1" - CRAWLAB_SERVER_PORT: 8000 - worker-1: - build: - context: dockerfiles/golang - command: "air -c .air.conf" + ports: + - "9080:8080" depends_on: - mongo - - redis - ports: - - 8001:8000 - volumes: - - ../backend:/backend - - /backend/tmp - environment: - CRAWLAB_SPIDER_PATH: "/spiders" - CRAWLAB_MONGO_HOST: "mongo" - CRAWLAB_REDIS_ADDRESS: "redis" - CRAWLAB_SERVER_MASTER: "N" - CRAWLAB_SERVER_PORT: 8001 - CRAWLAB_SERVER_REGISTER_TYPE: "customName" - CRAWLAB_SERVER_REGISTER_CUSTOMNODENAME: "worker_1" - worker-2: + worker01: build: - context: dockerfiles/golang - command: "air -c .air.conf" - depends_on: - - mongo - - redis - ports: - - 8002:8000 + context: ./dockerfiles/golang + command: "air -c .air.worker.conf" + environment: + CRAWLAB_NODE_MASTER: "N" + CRAWLAB_GRPC_ADDRESS: "master" + CRAWLAB_FS_FILER_URL: "http://master:8080/api/filer" volumes: + - "./.crawlab/worker01:/root/.crawlab" - ../backend:/backend - environment: - CRAWLAB_SPIDER_PATH: "/spiders" - CRAWLAB_MONGO_HOST: "mongo" - CRAWLAB_REDIS_ADDRESS: "redis" - CRAWLAB_SERVER_MASTER: "N" - CRAWLAB_SERVER_PORT: 8002 - CRAWLAB_SERVER_REGISTER_TYPE: "customName" - CRAWLAB_SERVER_REGISTER_CUSTOMNODENAME: "worker_2" - ui: + - ../backend/go.mod.local:/backend/go.mod + - ../..:/libs/crawlab-team + depends_on: + - master + + worker02: build: - context: ./dockerfiles/node - container_name: crawlab_frontend - ports: - - 8080:8080 + context: ./dockerfiles/golang + command: "air -c .air.worker.conf" + environment: + CRAWLAB_NODE_MASTER: "N" + CRAWLAB_GRPC_ADDRESS: "master" + CRAWLAB_FS_FILER_URL: "http://master:8080/api/filer" volumes: - - ../frontend:/frontend - - /frontend/node_modules - command: - - /bin/sh - - -c - - | - yarn install - yarn run serve + - "./.crawlab/worker02:/root/.crawlab" + - ../backend:/backend + - ../backend/go.mod.local:/backend/go.mod + - ../..:/libs/crawlab-team depends_on: - master + mongo: - image: mongo:latest - restart: always - # volumes: - # - "/opt/crawlab/mongo/data/db:/data/db" # make data persistent 持久化 - ports: - - "27017:27017" # expose port to host machine 暴露接口到宿主机 - redis: - image: redis:latest + image: mongo:4 restart: always - # command: redis-server --requirepass "password" # set redis password 设置 Redis 密码 - # volumes: - # - "/opt/crawlab/redis/data:/data" # make data persistent 持久化 ports: - - "6379:6379" # expose port to host machine 暴露接口到宿主机 + - "28017:27017" diff --git a/workspace/dockerfiles/golang/Dockerfile b/workspace/dockerfiles/golang/Dockerfile index b1ad99fd5..59e3cbda0 100644 --- a/workspace/dockerfiles/golang/Dockerfile +++ b/workspace/dockerfiles/golang/Dockerfile @@ -1,26 +1,40 @@ -FROM golang:buster +FROM golang:1.16 + RUN go env -w GOPROXY=https://goproxy.io,https://goproxy.cn && \ go env -w GO111MODULE="on" + WORKDIR /tools RUN go get github.com/cosmtrek/air + WORKDIR /backend RUN rm -rf /tools + # set as non-interactive ENV DEBIAN_FRONTEND noninteractive -# set CRAWLAB_IS_DOCKER -ENV CRAWLAB_IS_DOCKER Y # install packages RUN chmod 777 /tmp \ - && sed -i 's#http://deb.debian.org#https://mirrors.tuna.tsinghua.edu.cn#g' /etc/apt/sources.list \ && apt-get update \ - && apt-get install -y curl net-tools iputils-ping ntp ntpdate python3 python3-pip dumb-init \ + && apt-get install -y curl git net-tools iputils-ping ntp ntpdate nginx wget dumb-init cloc + +# install python +RUN apt-get install -y python3 python3-pip \ && ln -s /usr/bin/pip3 /usr/local/bin/pip \ && ln -s /usr/bin/python3 /usr/local/bin/python -RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple +# install golang +RUN curl -OL https://storage.googleapis.com/golang/go1.16.7.linux-amd64.tar.gz \ + && tar -C /usr/local -xvf go1.16.7.linux-amd64.tar.gz \ + && ln -s /usr/local/go/bin/go /usr/local/bin/go + +# install seaweedfs +RUN wget https://github.com/chrislusf/seaweedfs/releases/download/2.76/linux_amd64.tar.gz \ + && tar -zxf linux_amd64.tar.gz \ + && cp weed /usr/local/bin + # install backend -RUN pip install scrapy pymongo bs4 requests crawlab-sdk scrapy-splash -RUN mkdir /spiders && chmod -R 0755 /spiders +RUN pip install scrapy pymongo bs4 requests -i https://mirrors.aliyun.com/pypi/simple +RUN pip install crawlab-sdk==0.6.b20211024-1207 + VOLUME /backend EXPOSE 8080 From 182639ec8d531633524cef97e58d5a449acc8a36 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sat, 20 Nov 2021 11:33:12 +0800 Subject: [PATCH 02/11] updated go.mod --- backend/go.mod | 2 +- backend/go.sum | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/go.mod b/backend/go.mod index 39860256a..57a11f7ee 100644 --- a/backend/go.mod +++ b/backend/go.mod @@ -4,7 +4,7 @@ go 1.16 require ( github.com/apex/log v1.9.0 - github.com/crawlab-team/crawlab-core v0.6.0-beta.20211113.2050 + github.com/crawlab-team/crawlab-core v0.6.0-beta.20211120.1131 github.com/crawlab-team/go-trace v0.1.0 github.com/gin-gonic/gin v1.6.3 github.com/spf13/cobra v1.1.3 diff --git a/backend/go.sum b/backend/go.sum index e47485a81..37ec87a96 100644 --- a/backend/go.sum +++ b/backend/go.sum @@ -68,8 +68,8 @@ github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfc github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/crawlab-team/crawlab-core v0.0.1/go.mod h1:6dJHMvrmIJbfYHhYNeGZkGOLEBvur+yGiFzLCRXx92k= -github.com/crawlab-team/crawlab-core v0.6.0-beta.20211113.2050 h1:S58HbKOjErL6jOafBHi/5TvtAsCozwGQ1mBGonIQpHA= -github.com/crawlab-team/crawlab-core v0.6.0-beta.20211113.2050/go.mod h1:FVYKQc+//BZ6eaAAXPsHzVaAKD4fkGytqcoRSvgpK9I= +github.com/crawlab-team/crawlab-core v0.6.0-beta.20211120.1131 h1:iJSe2rugEDpWGMg42LuPQQ2BGPYL2beNozQmWz/81hg= +github.com/crawlab-team/crawlab-core v0.6.0-beta.20211120.1131/go.mod h1:FFs+U99Ow+sk+aREA7yU7JJ30P+nvugUPubgZj9UlsE= github.com/crawlab-team/crawlab-db v0.0.2/go.mod h1:o7o4rbcyAWlFGHg9VS7V7tM/GqRq+N2mnAXO71cZA78= github.com/crawlab-team/crawlab-db v0.1.1 h1:156h2fbbFKXAHs1mxprqRFC8zs2nrdyaG9JKG7patVw= github.com/crawlab-team/crawlab-db v0.1.1/go.mod h1:t0VidSjXKzQgACqNSQV5wusXncFtL6lGEiQTbLfNR04= @@ -80,8 +80,8 @@ github.com/crawlab-team/crawlab-grpc v0.6.0-beta.20211009.1455 h1:jykwiu71Vy+bD4 github.com/crawlab-team/crawlab-grpc v0.6.0-beta.20211009.1455/go.mod h1:W9Yee6xfesxoaqS5K1sF1I1zlH+i6xqwy4lyoBTOdkc= github.com/crawlab-team/crawlab-log v0.1.0 h1:0t+lZEojs3Vqb/bMkk2qs3I+1+XdwKG3pMTfeK5PZWM= github.com/crawlab-team/crawlab-log v0.1.0/go.mod h1:N8nTTKEbr9ZQSlmw0+HNB4ZAMQF4yVMaJLx8YhXvhNo= -github.com/crawlab-team/crawlab-vcs v0.6.0-beta.20211103.2013 h1:kdlyHC4LFz8ANSk7W9HuzJn2HdoVnSR7ddsiYFsNel4= -github.com/crawlab-team/crawlab-vcs v0.6.0-beta.20211103.2013/go.mod h1:G6Hnt/3255QCGHO5Q0xJe1AbJE7m5t65E0v7flRJBJM= +github.com/crawlab-team/crawlab-vcs v0.6.0-beta.20211113.2048 h1:idtg0LELTXVBNVCP8ZAAm76cHtObTC6fyojH3Y+MTSo= +github.com/crawlab-team/crawlab-vcs v0.6.0-beta.20211113.2048/go.mod h1:G6Hnt/3255QCGHO5Q0xJe1AbJE7m5t65E0v7flRJBJM= github.com/crawlab-team/go-trace v0.1.0 h1:uCqfdqNfb+NwqdkQrBkcYfQ9iqGJ76MbPw1wK8n7xGg= github.com/crawlab-team/go-trace v0.1.0/go.mod h1:LcWyn68HoT+d29CHM8L41pFHxsAcBMF1xjqJmWdyFh8= github.com/crawlab-team/goseaweedfs v0.1.6/go.mod h1:u+rwfqb0rnYllTLjCctE/z1Yp+TC8L+CbbWH8E2NstA= From 68a5c6b87d0c620d2f2b55a8b48dedd3de3914fe Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sat, 20 Nov 2021 11:35:39 +0800 Subject: [PATCH 03/11] updated go version to 1.16 --- Dockerfile | 2 +- Dockerfile.cn | 2 +- Dockerfile.local | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index aac69e148..bcdd4f4c5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.15 AS backend-build +FROM golang:1.16 AS backend-build WORKDIR /go/src/app COPY ./backend . diff --git a/Dockerfile.cn b/Dockerfile.cn index 0449e5276..4503b94d0 100644 --- a/Dockerfile.cn +++ b/Dockerfile.cn @@ -1,4 +1,4 @@ -FROM golang:1.15 AS backend-build +FROM golang:1.16 AS backend-build WORKDIR /go/src/app COPY ./backend . diff --git a/Dockerfile.local b/Dockerfile.local index 11d60e381..8210afd58 100644 --- a/Dockerfile.local +++ b/Dockerfile.local @@ -1,4 +1,4 @@ -FROM golang:1.15 AS backend-build +FROM golang:1.16 AS backend-build WORKDIR /go/src/app COPY ./backend . From 907897f288e40bc5c8d9856f10aa6a8382a901a0 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sat, 20 Nov 2021 17:54:57 +0800 Subject: [PATCH 04/11] updated api endpoint --- frontend/public/index.html | 3 +++ frontend/src/main.ts | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/frontend/public/index.html b/frontend/public/index.html index e71e52563..1196d73a3 100644 --- a/frontend/public/index.html +++ b/frontend/public/index.html @@ -7,6 +7,9 @@ <%= htmlWebpackPlugin.options.title %> +