Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

您好,在部署“使用 Seldon Core 启动模型服务”是遇到问题 #185

Open
VincentWei2021 opened this issue Aug 26, 2021 · 7 comments

Comments

@VincentWei2021
Copy link

{"level":"info","ts":1629958732.6136754,"logger":"controllers.SeldonDeployment","msg":"Scheme","SeldonDeployment":"kleveross-system/onnx-service-test002","r.scheme":{}}
{"level":"info","ts":1629958732.6136875,"logger":"controllers.SeldonDeployment","msg":"createDeployments","SeldonDeployment":"kleveross-system/onnx-service-test002","deploy":{"namespace":"kleveross-system","name":"onnx-service-test002-onnx-0-onnx"}}
{"level":"info","ts":1629958732.6138568,"logger":"controllers.SeldonDeployment","msg":"Updating Deployment","SeldonDeployment":"kleveross-system/onnx-service-test002","namespace":"kleveross-system","name":"onnx-service-test002-onnx-0-onnx"}
{"level":"info","ts":1629958732.6246343,"logger":"controllers.SeldonDeployment","msg":"The deployments are the same - api server defaults ignored","SeldonDeployment":"kleveross-system/onnx-service-test002"}
{"level":"info","ts":1629958732.6246703,"logger":"controllers.SeldonDeployment","msg":"Found identical deployment","SeldonDeployment":"kleveross-system/onnx-service-test002","namespace":"kleveross-system","name":"onnx-service-test002-onnx-0-onnx","status":{"observedGeneration":1,"replicas":1,"updatedReplicas":1,"unavailableReplicas":1,"conditions":[{"type":"Available","status":"False","lastUpdateTime":"2021-08-26T06:18:51Z","lastTransitionTime":"2021-08-26T06:18:51Z","reason":"MinimumReplicasUnavailable","message":"Deployment does not have minimum availability."},{"type":"Progressing","status":"True","lastUpdateTime":"2021-08-26T06:18:51Z","lastTransitionTime":"2021-08-26T06:18:51Z","reason":"ReplicaSetUpdated","message":"ReplicaSet "onnx-service-test002-onnx-0-onnx-85465b6bf8" is progressing."}]}}
{"level":"info","ts":1629958732.6247447,"logger":"controllers.SeldonDeployment","msg":"Deployment status ","SeldonDeployment":"kleveross-system/onnx-service-test002","name":"onnx-service-test002-onnx-0-onnx","status":{"observedGeneration":1,"replicas":1,"updatedReplicas":1,"unavailableReplicas":1,"conditions":[{"type":"Available","status":"False","lastUpdateTime":"2021-08-26T06:18:51Z","lastTransitionTime":"2021-08-26T06:18:51Z","reason":"MinimumReplicasUnavailable","message":"Deployment does not have minimum availability."},{"type":"Progressing","status":"True","lastUpdateTime":"2021-08-26T06:18:51Z","lastTransitionTime":"2021-08-26T06:18:51Z","reason":"ReplicaSetUpdated","message":"ReplicaSet "onnx-service-test002-onnx-0-onnx-85465b6bf8" is progressing."}]}}
{"level":"info","ts":1629958732.6375253,"logger":"controllers.SeldonDeployment","msg":"Reconcile called","SeldonDeployment":"kleveross-system/onnx-service-test002"}
{"level":"info","ts":1629958732.637599,"logger":"seldondeployment","msg":"Defaulting Seldon Deployment called","name":"onnx-service-test002"}
{"level":"info","ts":1629958732.637626,"logger":"controllers.SeldonDeployment","msg":"pSvcName","SeldonDeployment":"kleveross-system/onnx-service-test002","val":"onnx-service-test002-onnx"}
{"level":"info","ts":1629958732.6378431,"logger":"controllers.SeldonDeployment","msg":"Found identical Service","SeldonDeployment":"kleveross-system/onnx-service-test002","all":false,"namespace":"kleveross-system","name":"onnx-service-test002-onnx-onnx","status":{"loadBalancer":{}}}
{"level":"info","ts":1629958732.6379495,"logger":"controllers.SeldonDeployment","msg":"Found identical Service","SeldonDeployment":"kleveross-system/onnx-service-test002","all":false,"namespace":"kleveross-system","name":"onnx-service-test002-onnx","status":{"loadBalancer":{}}}
{"level":"info","ts":1629958732.6380079,"logger":"controllers.SeldonDeployment","msg":"Scheme","SeldonDeployment":"kleveross-system/onnx-service-test002","r.scheme":{}}
{"level":"info","ts":1629958732.638027,"logger":"controllers.SeldonDeployment","msg":"createDeployments","SeldonDeployment":"kleveross-system/onnx-service-test002","deploy":{"namespace":"kleveross-system","name":"onnx-service-test002-onnx-0-onnx"}}
{"level":"info","ts":1629958732.6382554,"logger":"controllers.SeldonDeployment","msg":"Updating Deployment","SeldonDeployment":"kleveross-system/onnx-service-test002","namespace":"kleveross-system","name":"onnx-service-test002-onnx-0-onnx"}
{"level":"info","ts":1629958732.6485512,"logger":"controllers.SeldonDeployment","msg":"The deployments are the same - api server defaults ignored","SeldonDeployment":"kleveross-system/onnx-service-test002"}
{"level":"info","ts":1629958732.6485822,"logger":"controllers.SeldonDeployment","msg":"Found identical deployment","SeldonDeployment":"kleveross-system/onnx-service-test002","namespace":"kleveross-system","name":"onnx-service-test002-onnx-0-onnx","status":{"observedGeneration":1,"replicas":1,"updatedReplicas":1,"unavailableReplicas":1,"conditions":[{"type":"Available","status":"False","lastUpdateTime":"2021-08-26T06:18:51Z","lastTransitionTime":"2021-08-26T06:18:51Z","reason":"MinimumReplicasUnavailable","message":"Deployment does not have minimum availability."},{"type":"Progressing","status":"True","lastUpdateTime":"2021-08-26T06:18:51Z","lastTransitionTime":"2021-08-26T06:18:51Z","reason":"ReplicaSetUpdated","message":"ReplicaSet "onnx-service-test002-onnx-0-onnx-85465b6bf8" is progressing."}]}}
{"level":"info","ts":1629958732.6486323,"logger":"controllers.SeldonDeployment","msg":"Deployment status ","SeldonDeployment":"kleveross-system/onnx-service-test002","name":"onnx-service-test002-onnx-0-onnx","status":{"observedGeneration":1,"replicas":1,"updatedReplicas":1,"unavailableReplicas":1,"conditions":[{"type":"Available","status":"False","lastUpdateTime":"2021-08-26T06:18:51Z","lastTransitionTime":"2021-08-26T06:18:51Z","reason":"MinimumReplicasUnavailable","message":"Deployment does not have minimum availability."},{"type":"Progressing","status":"True","lastUpdateTime":"2021-08-26T06:18:51Z","lastTransitionTime":"2021-08-26T06:18:51Z","reason":"ReplicaSetUpdated","message":"ReplicaSet "onnx-service-test002-onnx-0-onnx-85465b6bf8" is progressing."}]}}

@gaocegege
Copy link
Member

Can you please show kubectl describe <targeting pod>

@VincentWei2021
Copy link
Author

[root@k8s-master ~]# kubectl describe pod -n seldon-system seldon-controller-manager-6757ccd99-792mb
Name: seldon-controller-manager-6757ccd99-792mb
Namespace: seldon-system
Priority: 0
Node: k8s-master/192.168.100.48
Start Time: Thu, 26 Aug 2021 07:29:53 +0000
Labels: app=seldon
app.kubernetes.io/instance=seldon1
app.kubernetes.io/name=seldon
app.kubernetes.io/version=v0.5
control-plane=seldon-controller-manager
pod-template-hash=6757ccd99
Annotations: prometheus.io/scrape: true
sidecar.istio.io/inject: false
Status: Running
IP: 10.244.0.181
IPs:
IP: 10.244.0.181
Controlled By: ReplicaSet/seldon-controller-manager-6757ccd99
Containers:
manager:
Container ID: docker://47eae4b2a7a32104217cde505d693cb0a68fbdbf557e07630fc041ed765be11d
Image: ghcr.io/kleveross/seldon-core-operator:v1.5.0-alpha.3
Image ID: docker-pullable://ghcr.io/kleveross/seldon-core-operator@sha256:6d305105b68c4f86fe66b6eafeed07132976512ff60944cb29df489b028dfb25
Ports: 4443/TCP, 8080/TCP
Host Ports: 0/TCP, 0/TCP
Command:
/manager
Args:
--enable-leader-election
--webhook-port=4443
--create-resources=$(MANAGER_CREATE_RESOURCES)
--log-level=$(MANAGER_LOG_LEVEL)

State:          Running
  Started:      Thu, 26 Aug 2021 07:29:55 +0000
Ready:          True
Restart Count:  0
Limits:
  cpu:     500m
  memory:  300Mi
Requests:
  cpu:     100m
  memory:  200Mi
Environment:
  MANAGER_LOG_LEVEL:                            INFO
  WATCH_NAMESPACE:                              
  RELATED_IMAGE_EXECUTOR:                       
  RELATED_IMAGE_ENGINE:                         
  RELATED_IMAGE_STORAGE_INITIALIZER:            
  RELATED_IMAGE_SKLEARNSERVER:                  
  RELATED_IMAGE_XGBOOSTSERVER:                  
  RELATED_IMAGE_MLFLOWSERVER:                   
  RELATED_IMAGE_TFPROXY:                        
  RELATED_IMAGE_TENSORFLOW:                     
  RELATED_IMAGE_EXPLAINER:                      
  RELATED_IMAGE_MOCK_CLASSIFIER:                
  MANAGER_CREATE_RESOURCES:                     false
  POD_NAMESPACE:                                seldon-system (v1:metadata.namespace)
  CONTROLLER_ID:                                
  AMBASSADOR_ENABLED:                           false
  AMBASSADOR_SINGLE_NAMESPACE:                  false
  ENGINE_CONTAINER_IMAGE_AND_VERSION:           docker.io/seldonio/engine:1.10.0
  ENGINE_CONTAINER_IMAGE_PULL_POLICY:           IfNotPresent
  ENGINE_CONTAINER_SERVICE_ACCOUNT_NAME:        default
  ENGINE_CONTAINER_USER:                        8888
  ENGINE_LOG_MESSAGES_EXTERNALLY:               false
  PREDICTIVE_UNIT_HTTP_SERVICE_PORT:            9000
  PREDICTIVE_UNIT_GRPC_SERVICE_PORT:            9500
  PREDICTIVE_UNIT_DEFAULT_ENV_SECRET_REF_NAME:  
  PREDICTIVE_UNIT_METRICS_PORT_NAME:            metrics
  ENGINE_SERVER_GRPC_PORT:                      5001
  ENGINE_SERVER_PORT:                           8000
  ENGINE_PROMETHEUS_PATH:                       /prometheus
  ISTIO_ENABLED:                                true
  KEDA_ENABLED:                                 false
  ISTIO_GATEWAY:                                istio-system/kleveross-gateway
  ISTIO_TLS_MODE:                               
  USE_EXECUTOR:                                 true
  EXECUTOR_CONTAINER_IMAGE_AND_VERSION:         docker.io/seldonio/seldon-core-executor:1.10.0
  EXECUTOR_CONTAINER_IMAGE_PULL_POLICY:         IfNotPresent
  EXECUTOR_PROMETHEUS_PATH:                     /prometheus
  EXECUTOR_SERVER_PORT:                         8000
  EXECUTOR_CONTAINER_USER:                      8888
  EXECUTOR_CONTAINER_SERVICE_ACCOUNT_NAME:      default
  EXECUTOR_SERVER_METRICS_PORT_NAME:            metrics
  EXECUTOR_REQUEST_LOGGER_DEFAULT_ENDPOINT:     http://default-broker
  DEFAULT_USER_ID:                              0
  EXECUTOR_DEFAULT_CPU_REQUEST:                 500m
  EXECUTOR_DEFAULT_MEMORY_REQUEST:              512Mi
  EXECUTOR_DEFAULT_CPU_LIMIT:                   500m
  EXECUTOR_DEFAULT_MEMORY_LIMIT:                512Mi
  ENGINE_DEFAULT_CPU_REQUEST:                   500m
  ENGINE_DEFAULT_MEMORY_REQUEST:                512Mi
  ENGINE_DEFAULT_CPU_LIMIT:                     500m
  ENGINE_DEFAULT_MEMORY_LIMIT:                  512Mi
Mounts:
  /tmp/k8s-webhook-server/serving-certs from cert (ro)
  /var/run/secrets/kubernetes.io/serviceaccount from seldon-manager-token-m6nl4 (ro)

Conditions:
Type Status
Initialized True
Ready True
ContainersReady True
PodScheduled True
Volumes:
cert:
Type: Secret (a volume populated by a Secret)
SecretName: seldon-webhook-server-cert
Optional: false
seldon-manager-token-m6nl4:
Type: Secret (a volume populated by a Secret)
SecretName: seldon-manager-token-m6nl4
Optional: false
QoS Class: Burstable
Node-Selectors:
Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type Reason Age From Message


Normal Scheduled 96s default-scheduler Successfully assigned seldon-system/seldon-controller-manager-6757ccd99-792mb to k8s-master
Normal Pulled 95s kubelet Container image "ghcr.io/kleveross/seldon-core-operator:v1.5.0-alpha.3" already present on machine
Normal Created 94s kubelet Created container manager
Normal Started 93s kubelet Started container manager

@gaocegege
Copy link
Member

Sorry, I do not mean the seldon controller manager, I mean the model server pod.

@VincentWei2021
Copy link
Author

model服务pod没起来

@VincentWei2021
Copy link
Author

我现在换了seldon-controller manager的版本后
2021/08/26 07:36:18 http: panic serving 192.168.100.48:31324: runtime error: invalid memory address or nil pointer dereference
goroutine 1429 [running]:
net/http.(*conn).serve.func1(0xc0004739a0)
/usr/local/go/src/net/http/server.go:1800 +0x139
panic(0x1736ec0, 0x28002c0)
/usr/local/go/src/runtime/panic.go:975 +0x3e3
github.com/seldonio/seldon-core/operator/apis/machinelearning.seldon.io/v1.(*SeldonDeploymentSpec).checkPredictiveUnits(0xc000df2518, 0xc0005da6a0, 0xc0005da690, 0xc000f305d0, 0x0, 0x0, 0x0, 0xc0006b5ce0, 0xc00009a100, 0x901)
/workspace/apis/machinelearning.seldon.io/v1/seldondeployment_webhook.go:338 +0x40
github.com/seldonio/seldon-core/operator/apis/machinelearning.seldon.io/v1.(*SeldonDeploymentSpec).ValidateSeldonDeployment(0xc000df2518, 0x1949373, 0x27)
/workspace/apis/machinelearning.seldon.io/v1/seldondeployment_webhook.go:524 +0x590
github.com/seldonio/seldon-core/operator/apis/machinelearning.seldon.io/v1.(*SeldonDeployment).ValidateCreate(0xc000df2400, 0xc00062b0b0, 0x24)
/workspace/apis/machinelearning.seldon.io/v1/seldondeployment_webhook.go:576 +0xde
sigs.k8s.io/controller-runtime/pkg/webhook/admission.(*validatingHandler).Handle(0xc000335fa0, 0x1b9e5c0, 0xc001494e00, 0xc00062b0b0, 0x24, 0xc00098b380, 0x19, 0xc001158418, 0x2, 0xc001158420, ...)
/go/pkg/mod/sigs.k8s.io/[email protected]/pkg/webhook/admission/validator.go:69 +0xa74
sigs.k8s.io/controller-runtime/pkg/webhook/admission.(*Webhook).Handle(0xc00011e450, 0x1b9e5c0, 0xc001494e00, 0xc00062b0b0, 0x24, 0xc00098b380, 0x19, 0xc001158418, 0x2, 0xc001158420, ...)
/go/pkg/mod/sigs.k8s.io/[email protected]/pkg/webhook/admission/webhook.go:135 +0xb3
sigs.k8s.io/controller-runtime/pkg/webhook/admission.(*Webhook).ServeHTTP(0xc00011e450, 0x7f2460700018, 0xc001480690, 0xc00067e000)
/go/pkg/mod/sigs.k8s.io/[email protected]/pkg/webhook/admission/http.go:87 +0xb61
github.com/prometheus/client_golang/prometheus/promhttp.InstrumentHandlerInFlight.func1(0x7f2460700018, 0xc001480690, 0xc00067e000)
/go/pkg/mod/github.com/prometheus/[email protected]/prometheus/promhttp/instrument_server.go:40 +0xab
net/http.HandlerFunc.ServeHTTP(0xc00011e6f0, 0x7f2460700018, 0xc001480690, 0xc00067e000)
/usr/local/go/src/net/http/server.go:2041 +0x44
github.com/prometheus/client_golang/prometheus/promhttp.InstrumentHandlerCounter.func1(0x1b99940, 0xc000cde000, 0xc00067e000)
/go/pkg/mod/github.com/prometheus/[email protected]/prometheus/promhttp/instrument_server.go:100 +0xda
net/http.HandlerFunc.ServeHTTP(0xc00011e840, 0x1b99940, 0xc000cde000, 0xc00067e000)
/usr/local/go/src/net/http/server.go:2041 +0x44
github.com/prometheus/client_golang/prometheus/promhttp.InstrumentHandlerDuration.func2(0x1b99940, 0xc000cde000, 0xc00067e000)
/go/pkg/mod/github.com/prometheus/[email protected]/prometheus/promhttp/instrument_server.go:76 +0xb2
net/http.HandlerFunc.ServeHTTP(0xc00011e930, 0x1b99940, 0xc000cde000, 0xc00067e000)
/usr/local/go/src/net/http/server.go:2041 +0x44
net/http.(*ServeMux).ServeHTTP(0xc000993700, 0x1b99940, 0xc000cde000, 0xc00067e000)
/usr/local/go/src/net/http/server.go:2416 +0x1a5
net/http.serverHandler.ServeHTTP(0xc0009b1ea0, 0x1b99940, 0xc000cde000, 0xc00067e000)
/usr/local/go/src/net/http/server.go:2836 +0xa3
net/http.(*conn).serve(0xc0004739a0, 0x1b9e5c0, 0xc001494d00)
/usr/local/go/src/net/http/server.go:1924 +0x86c
created by net/http.(*Server).Serve
/usr/local/go/src/net/http/server.go:2962 +0x35c

@VincentWei2021
Copy link
Author

http: panic serving 192.168.100.48:31324: runtime error: invalid memory address or nil pointer dereference

@gaocegege
Copy link
Member

看起来是 seldon 这边出现了 null pointer exception,后来有定位到么

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants