Skip to content

Commit

Permalink
[INLONG-8251][Agent] Add global memory limit for file collect (apache…
Browse files Browse the repository at this point in the history
  • Loading branch information
justinwwhuang authored Jun 16, 2023
1 parent 87d2613 commit 32e29b8
Show file tree
Hide file tree
Showing 19 changed files with 492 additions and 237 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,14 @@ public class CommonConstants {
// max size of single batch in bytes, default is 800KB.
public static final int DEFAULT_PROXY_PACKAGE_MAX_SIZE = 800000;

public static final String PROXY_MESSAGE_SEMAPHORE = "proxy.semaphore";
public static final int DEFAULT_PROXY_MESSAGE_SEMAPHORE = 20000;

public static final String PROXY_INLONG_STREAM_ID_QUEUE_MAX_NUMBER = "proxy.group.queue.maxNumber";
public static final int DEFAULT_PROXY_INLONG_STREAM_ID_QUEUE_MAX_NUMBER = 10000;

public static final String PROXY_PACKAGE_MAX_TIMEOUT_MS = "proxy.package.maxTimeout.ms";
public static final int DEFAULT_PROXY_PACKAGE_MAX_TIMEOUT_MS = 4 * 1000;

public static final String PROXY_BATCH_FLUSH_INTERVAL = "proxy.batch.flush.interval";
public static final int DEFAULT_PROXY_BATCH_FLUSH_INTERVAL = 1000;
public static final int DEFAULT_PROXY_BATCH_FLUSH_INTERVAL = 100;

public static final String PROXY_SENDER_MAX_TIMEOUT = "proxy.sender.maxTimeout";
// max timeout in seconds.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,16 @@ public class FetcherConstants {

public static final String AGENT_MANAGER_AUTH_SECRET_ID = "agent.manager.auth.secretId";
public static final String AGENT_MANAGER_AUTH_SECRET_KEY = "agent.manager.auth.secretKey";

public static final String AGENT_GLOBAL_READER_SOURCE_PERMIT = "agent.global.reader.source.permit";
public static final int DEFAULT_AGENT_GLOBAL_READER_SOURCE_PERMIT = 16 * 1000 * 1000;

public static final String AGENT_GLOBAL_READER_QUEUE_PERMIT = "agent.global.reader.queue.permit";
public static final int DEFAULT_AGENT_GLOBAL_READER_QUEUE_PERMIT = 16 * 1000 * 1000;

public static final String AGENT_GLOBAL_CHANNEL_PERMIT = "agent.global.channel.permit";
public static final int DEFAULT_AGENT_GLOBAL_CHANNEL_PERMIT = 16 * 1000 * 1000;

public static final String AGENT_GLOBAL_WRITER_PERMIT = "agent.global.writer.permit";
public static final int DEFAULT_AGENT_GLOBAL_WRITER_PERMIT = 96 * 1000 * 1000;
}
Original file line number Diff line number Diff line change
Expand Up @@ -145,19 +145,18 @@ public BatchProxyMessage fetchBatch() {
// pre check message size
ProxyMessage peekMessage = messageQueue.peek();
int peekMessageLength = peekMessage.getBody().length;
if (resultBatchSize + peekMessageLength > maxPackSize) {
break;
}
ProxyMessage message = messageQueue.remove();
int bodySize = message.getBody().length;
if (peekMessageLength > maxPackSize) {
LOGGER.warn("message size is {}, greater than max pack size {}, drop it!",
peekMessage.getBody().length, maxPackSize);
int bodySize = peekMessage.getBody().length;
queueSize.addAndGet(-bodySize);
messageQueue.remove();
break;
}
if (resultBatchSize + peekMessageLength > maxPackSize) {
break;
}
ProxyMessage message = messageQueue.remove();
int bodySize = message.getBody().length;
resultBatchSize += bodySize;
// decrease queue size.
queueSize.addAndGet(-bodySize);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
import org.apache.inlong.agent.constant.AgentConstants;
import org.apache.inlong.agent.core.conf.ConfigJetty;
import org.apache.inlong.agent.core.job.JobManager;
import org.apache.inlong.agent.core.task.PositionManager;
import org.apache.inlong.agent.core.task.TaskManager;
import org.apache.inlong.agent.core.task.TaskPositionManager;
import org.apache.inlong.agent.core.trigger.TriggerManager;
import org.apache.inlong.agent.db.CommandDb;
import org.apache.inlong.agent.db.Db;
Expand Down Expand Up @@ -57,7 +57,7 @@ public class AgentManager extends AbstractDaemon {
private final JobManager jobManager;
private final TaskManager taskManager;
private final TriggerManager triggerManager;
private final TaskPositionManager taskPositionManager;
private final PositionManager positionManager;
private final HeartbeatManager heartbeatManager;
private final ProfileFetcher fetcher;
private final AgentConfiguration conf;
Expand All @@ -82,7 +82,7 @@ public AgentManager() {
taskManager = new TaskManager(this);
fetcher = initFetcher(this);
heartbeatManager = HeartbeatManager.getInstance(this);
taskPositionManager = TaskPositionManager.getInstance(this);
positionManager = PositionManager.getInstance(this);
// need to be an option.
if (conf.getBoolean(
AgentConstants.AGENT_ENABLE_HTTP, AgentConstants.DEFAULT_AGENT_ENABLE_HTTP)) {
Expand Down Expand Up @@ -174,8 +174,8 @@ public TriggerManager getTriggerManager() {
return triggerManager;
}

public TaskPositionManager getTaskPositionManager() {
return taskPositionManager;
public PositionManager getTaskPositionManager() {
return positionManager;
}

public TaskManager getTaskManager() {
Expand Down Expand Up @@ -206,7 +206,7 @@ public void start() throws Exception {
LOGGER.info("starting heartbeat manager");
heartbeatManager.start();
LOGGER.info("starting task position manager");
taskPositionManager.start();
positionManager.start();
LOGGER.info("starting read job from local");
// read job profiles from local
List<JobProfile> profileList = localProfile.readFromLocal();
Expand Down Expand Up @@ -249,7 +249,7 @@ public void stop() throws Exception {
jobManager.stop();
taskManager.stop();
heartbeatManager.stop();
taskPositionManager.stop();
positionManager.stop();
agentConfMonitor.shutdown();
this.db.close();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.inlong.agent.core.job.Job;
import org.apache.inlong.agent.core.job.JobManager;
import org.apache.inlong.agent.core.job.JobWrapper;
import org.apache.inlong.agent.core.task.MemoryManager;
import org.apache.inlong.agent.state.State;
import org.apache.inlong.agent.utils.AgentUtils;
import org.apache.inlong.agent.utils.HttpManager;
Expand Down Expand Up @@ -69,6 +70,7 @@
public class HeartbeatManager extends AbstractDaemon implements AbstractHeartbeatManager {

private static final Logger LOGGER = LoggerFactory.getLogger(HeartbeatManager.class);
public static final int PRINT_MEMORY_PERMIT_INTERVAL_SECOND = 60;
private static HeartbeatManager heartbeatManager = null;
private final JobManager jobmanager;
private final AgentConfiguration conf;
Expand Down Expand Up @@ -122,6 +124,16 @@ public static HeartbeatManager getInstance() {
public void start() throws Exception {
submitWorker(snapshotReportThread());
submitWorker(heartbeatReportThread());
submitWorker(printMemoryPermitThread());
}

private Runnable printMemoryPermitThread() {
return () -> {
while (isRunnable()) {
MemoryManager.getInstance().printAll();
AgentUtils.silenceSleepInSeconds(PRINT_MEMORY_PERMIT_INTERVAL_SECOND);
}
};
}

private Runnable snapshotReportThread() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ public JobManager(AgentManager agentManager, JobProfileDb jobProfileDb) {
this.dimensions = new HashMap<>();
this.dimensions.put(KEY_COMPONENT_NAME, this.getClass().getSimpleName());
this.jobMetrics = new AgentMetricItemSet(this.getClass().getSimpleName());
MetricRegister.unregister(jobMetrics);
MetricRegister.register(jobMetrics);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.agent.core.task;

import org.apache.inlong.agent.conf.AgentConfiguration;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Semaphore;

import static org.apache.inlong.agent.constant.FetcherConstants.AGENT_GLOBAL_CHANNEL_PERMIT;
import static org.apache.inlong.agent.constant.FetcherConstants.AGENT_GLOBAL_READER_QUEUE_PERMIT;
import static org.apache.inlong.agent.constant.FetcherConstants.AGENT_GLOBAL_READER_SOURCE_PERMIT;
import static org.apache.inlong.agent.constant.FetcherConstants.AGENT_GLOBAL_WRITER_PERMIT;
import static org.apache.inlong.agent.constant.FetcherConstants.DEFAULT_AGENT_GLOBAL_CHANNEL_PERMIT;
import static org.apache.inlong.agent.constant.FetcherConstants.DEFAULT_AGENT_GLOBAL_READER_QUEUE_PERMIT;
import static org.apache.inlong.agent.constant.FetcherConstants.DEFAULT_AGENT_GLOBAL_READER_SOURCE_PERMIT;
import static org.apache.inlong.agent.constant.FetcherConstants.DEFAULT_AGENT_GLOBAL_WRITER_PERMIT;

/**
* used to limit global memory to avoid oom
*/
public class MemoryManager {

private static final Logger LOGGER = LoggerFactory.getLogger(MemoryManager.class);
private static volatile MemoryManager memoryManager = null;
private final AgentConfiguration conf;
private ConcurrentHashMap<String, Semaphore> semaphoreMap = new ConcurrentHashMap<>();

private MemoryManager() {
this.conf = AgentConfiguration.getAgentConf();
Semaphore semaphore = null;
semaphore = new Semaphore(
conf.getInt(AGENT_GLOBAL_READER_SOURCE_PERMIT, DEFAULT_AGENT_GLOBAL_READER_SOURCE_PERMIT));
semaphoreMap.put(AGENT_GLOBAL_READER_SOURCE_PERMIT, semaphore);

semaphore = new Semaphore(
conf.getInt(AGENT_GLOBAL_READER_QUEUE_PERMIT, DEFAULT_AGENT_GLOBAL_READER_QUEUE_PERMIT));
semaphoreMap.put(AGENT_GLOBAL_READER_QUEUE_PERMIT, semaphore);

semaphore = new Semaphore(
conf.getInt(AGENT_GLOBAL_CHANNEL_PERMIT, DEFAULT_AGENT_GLOBAL_CHANNEL_PERMIT));
semaphoreMap.put(AGENT_GLOBAL_CHANNEL_PERMIT, semaphore);

semaphore = new Semaphore(
conf.getInt(AGENT_GLOBAL_WRITER_PERMIT, DEFAULT_AGENT_GLOBAL_WRITER_PERMIT));
semaphoreMap.put(AGENT_GLOBAL_WRITER_PERMIT, semaphore);
}

/**
* manager singleton
*/
public static MemoryManager getInstance() {
if (memoryManager == null) {
synchronized (MemoryManager.class) {
if (memoryManager == null) {
memoryManager = new MemoryManager();
}
}
}
return memoryManager;
}

public boolean tryAcquire(String semaphoreName, int permit) {
Semaphore semaphore = semaphoreMap.get(semaphoreName);
if (semaphore == null) {
LOGGER.error("tryAcquire {} not exist");
return false;
}
return semaphore.tryAcquire(permit);
}

public void release(String semaphoreName, int permit) {
Semaphore semaphore = semaphoreMap.get(semaphoreName);
if (semaphore == null) {
LOGGER.error("release {} not exist");
return;
}
semaphore.release(permit);
}

public void printDetail(String semaphoreName) {
Semaphore semaphore = semaphoreMap.get(semaphoreName);
if (semaphore == null) {
LOGGER.error("printDetail {} not exist");
return;
}
LOGGER.info("permit left {} wait {} {}", semaphore.availablePermits(), semaphore.getQueueLength(),
semaphoreName);
}

public void printAll() {
printDetail(AGENT_GLOBAL_READER_SOURCE_PERMIT);
printDetail(AGENT_GLOBAL_READER_QUEUE_PERMIT);
printDetail(AGENT_GLOBAL_CHANNEL_PERMIT);
printDetail(AGENT_GLOBAL_WRITER_PERMIT);
}
}
Loading

0 comments on commit 32e29b8

Please sign in to comment.