温控daemon（六）Monitor算法

高茂

2023-12-01

Monitor算法温控的常见算法之一，在main函数中调用了thermal_monitor函数.Monitor算法是一种静态算法，当超过每一个设置的温度值就会调频，当小于clr的温度就会停止调频算法。

1. thermal_monitor函数

thermal_monitor函数先是从dev_list中获取了各个device_info放入device_info_arr中，然后过滤setting放入tm_states的setting中，然后执行了sensor_up以及创建了一个thread执行函数sensor_monitor来监控是否触发monitor算法。

void thermal_monitor(struct thermal_setting_t *settings)
{
	struct setting_info *cfg_setting;
	union device_request req;

	/* Build Device Info List */
	if (devices_manager_get_list(NULL, &device_info_arr_len)) {//先获取dev_list的长度
		msg("Failed to get device list length\n");
		return;
	}

	device_info_arr = (struct device_info *)//根据device的长度 malloc
		malloc(sizeof(struct device_info)*device_info_arr_len);

	if (device_info_arr == NULL) {
		msg("Failed to alloc device_info_arr\n");
		return;
	}

	if (devices_manager_get_list(device_info_arr,//得到device_info
				     &device_info_arr_len)) {
		msg("Failed to get device list\n");
		free(device_info_arr);
		return;
	}

	cfg_setting = settings->list;

	while (cfg_setting && (tm_cnt < MAX_TM_INSTANCES_SUPPORTED)) {
		if ((cfg_setting->algo_type != MONITOR_ALGO_TYPE) ||//不是Monitor continue
		    (cfg_setting->err_disable != 0)) {//坏的setting continue
			cfg_setting = cfg_setting->next;
			continue;
		}

		dbgmsg("%s: Import %s", __func__, cfg_setting->desc);
		tm_states[tm_cnt].setting = cfg_setting;//放入tm_states数组的setting
		tm_states[tm_cnt].disable = cfg_setting->disable;
		tm_cnt++;

		if (!cfg_setting->disable)//没有disable 打印
			print_setting(cfg_setting);

		/* KEEP at end of while block */
		cfg_setting = cfg_setting->next;
	}

	if (!sensors_setup()) {//
		msg("Failed to setup at least one sensor for monitoring\n");
		return;
	}

	/* Vote to keep kernel mitigation enabled until thermal monitor has
	   processed initial thresholds. */
	kernel_dev = devices_manager_reg_clnt("kernel");
	if (kernel_dev == NULL) {
		msg("%s Failed to create kernel device handle\n", __func__);
		return;
	}
	req.value = 1;
	device_clnt_request(kernel_dev, &req);//继续keep KTM

	if (pthread_create(&tm_thread, NULL, (void *)&sensor_monitor,//sensor_monitor监控
			   (void *)NULL) != 0) {
		msg("Error initializing thermal monitor\n");
		device_clnt_cancel_request(kernel_dev);//失败就取消之前keep KTM的命令
	}
}

我们来看下devices_manager_get_list 函数，配合上面thermal_monitor函数，第一次传进来的info_arr为NULL，因此获取了device的长度，第二次再从dev_list中获取dev_info放入dev_info_arr.

int devices_manager_get_list(struct device_info *info_arr, uint32_t *info_arr_len)
{
	uint32_t dev_idx;
	struct devices_manager_dev *curr = dev_list;

	if ((info_arr == NULL) && (info_arr_len == NULL)) {
		msg("%s: Invalid args.\n", __func__);
		return -(EINVAL);
	}

	if (info_arr == NULL) {//第一次获取长度
		/* Interpret as request for number of dev's present. */
		*info_arr_len = dev_cnt;
		return 0;
	}

	/* Don't exceed end of info_array */
	*info_arr_len = MIN(*info_arr_len, dev_cnt);

	for (dev_idx = 0; (dev_idx < *info_arr_len) && (curr != NULL); dev_idx++) {//第二次遍历dev_list获取dev_info
		memcpy(&(info_arr[dev_idx]), &(curr->dev_info),
		       sizeof(struct device_info));
		curr = curr->next_dev;
	}

	return 0;
}

2. sensor_setup

下面我们再来看sensors_setup函数，这个函数我们需要结合monitor算法实际的配置来看。

static int sensors_setup(void)
{
	uint32_t i = 0;
	int sensor_count = 0;

	if (!tm_cnt)
		return 0;

	/* Set up tm instances */
	dbgmsg("%s: tm_cnt %d", __func__, tm_cnt);
	for (i = 0; i < tm_cnt; i++) {//遍历所有的tm_states
		struct tm_instance_info *tm_instance_info;
		struct setting_info *setting;
		struct tm_setting *tm_setting_info;

		tm_instance_info = &tm_states[i];
		setting = tm_instance_info->setting;
		tm_setting_info = &(setting->data.tm);

		dbgmsg("%s: TM Id %s Sensor %s num_thresholds %d", __func__,
		    setting->desc, tm_setting_info->sensor,
		    tm_setting_info->num_thresholds);
		if (tm_setting_info->num_thresholds > 0) {//结合下面num_thresholds为3
			/* Create sensor client */
			tm_instance_info->ts_clnt =
				sensors_manager_reg_clnt(tm_setting_info->sensor);//结合下面看我们的sensor是tsens_tz_sensor1
			if (tm_instance_info->ts_clnt == NULL) {//上面是创建一个sensor的client
				msg("%s: Can't create client for %s.\n",
				    __func__, tm_setting_info->sensor);
				tm_instance_info->disable = 1;
				continue;
			}

			/* Create necessary device clients */
			if (create_device_clnts(tm_instance_info) == 0)//这里处理每一个action
				sensor_count++;
			else
				tm_instance_info->disable = 1;
		}
	}
	return sensor_count;
}

下面是一个8909的Monitor算法的一部分：

	{
		.desc = "CAMERA_CAMCORDER_MONITOR",
		.algo_type = MONITOR_ALGO_TYPE,
		.data.tm = {
			.sensor = "tsens_tz_sensor1",
			.sampling_period_ms = 250,
			.num_thresholds = 3,//t的个数
			._n_thresholds = 3,
			._n_to_clear = 3,
			._n_actions = 3,
			._n_action_info = 3,
			.t[0] = {
				.lvl_trig = 80000,
				.lvl_clr = 75000,
				.num_actions = 2,//action的个数
				.actions[0] = {
					.device = "camera",
					.info = 1,
				},
				.actions[1] = {
					.device = "camcorder",
					.info = 1,
				},
			},
			.t[1] = {
				.lvl_trig = 85000,
				.lvl_clr = 80000,
				.num_actions = 2,
				.actions[0] = {
					.device = "camera",
					.info = 2,
				},
				.actions[1] = {
					.device = "camcorder",
					.info = 2,
				},
			},
			.t[2] = {
				.lvl_trig = 88000,
				.lvl_clr = 85000,
				.num_actions = 2,
				.actions[0] = {
					.device = "camera",
					.info = 10,
				},
				.actions[1] = {
					.device = "camcorder",
					.info = 10,
				},
			}
		},
	},

sensors_manager_reg_clnt函数就是为sensor创建一个client

sensor_clnt_handle sensors_manager_reg_clnt(const char *sensor_name)
{
	struct sensor_client_type *client = NULL;
	struct sensors_mgr_sensor_info *sensor_mgr = NULL;

	if (sensor_name == NULL) {
		msg("%s: Invalid args.\n", __func__);
		return client;
	}

	sensor_mgr = find_sensor(sensor_name);//从sensor_list中找到sensor

	if (sensor_mgr == NULL) {
		msg("%s: Invalid sensor %s.\n", __func__, sensor_name);
		return client;
	}

	client = malloc(sizeof(struct sensor_client_type));

	if (client == NULL) {
		msg("%s: Alloc. failed for %s.\n", __func__, sensor_name);
		return client;
	}
	memset(client, 0x0, sizeof(struct sensor_client_type));

	THERM_MUTEX_LOCK(&ts_clnt_mtx);
	/* Insert the client */
	client->sensor_mgr = sensor_mgr;
	client->next_clnt = sensor_mgr->client_list;
	sensor_mgr->client_list = client;
	THERM_MUTEX_UNLOCK(&ts_clnt_mtx);

	return client;
}

我们再来看下create_device_clnts函数

static int create_device_clnts(struct tm_instance_info *tm_instance_info)
{
	int ret_val = 0;
	uint32_t t_idx, a_idx;
	struct tm_devices_list list;//创建一个list
	struct tm_setting *tm_setting_info = &tm_instance_info->setting->data.tm;

	memset(&list, 0x0, sizeof(struct tm_devices_list));
	/* Create list of unique actions */
	for (t_idx = 0; t_idx < tm_setting_info->num_thresholds; t_idx++) {//先遍历num_therosholds就是t的个数
		for (a_idx = 0; a_idx < tm_setting_info->t[t_idx].num_actions;//再遍历每个t下面的num_actions就是action的个数
		     a_idx++) {
			/* Index used by tm to make requests on correct device
			   client */
			tm_setting_info->t[t_idx].actions[a_idx].device_idx  =
				add_device_to_list(tm_instance_info, &list,
						   tm_setting_info->t[t_idx].actions[a_idx].device);
			if (tm_setting_info->t[t_idx].actions[a_idx].device_idx < 0) {
				msg("%s: Error adding device %s\n", __func__,
				    tm_setting_info->t[t_idx].actions[a_idx].device);
				ret_val = -(EFAULT);
				goto error_handler;
			}
		}
	}

error_handler:
	return ret_val;
}

我们再来看add_device_to_list函数就是为每个action的device得到deviceinfo放到tm_instance_info的dev_info_list，并且创建一个client放在dev_cln_list，并且把索引保存在每个action的device_idx中。

static int add_device_to_list(struct tm_instance_info *tm_instance_info,
			      struct tm_devices_list *list, const char *device)
{
	uint32_t i;

	/* Search for match or first available slot. */
	for (i = 0; i < MAX_ACTIONS_PER_TM_INSTANCE; i++) {
		/* Add to first empty entry, if no previous match. */
		if (list->device[i] == NULL)
			break;
		if (strncasecmp(list->device[i], device,
				DEVICES_MAX_NAME_LEN) == 0)
			break;
	}

	if (i >= MAX_ACTIONS_PER_TM_INSTANCE) {
		msg("%s: No room for device %s", __func__, device);
		return -1;
	}

	/* Check if we need to create the device client */
	if (list->device[i] == NULL) {
		tm_instance_info->dev_info_list[i] = get_device_info(device);//每一个tm_instace_info找到每个action的deviceinfo
		if (tm_instance_info->dev_info_list[i] == NULL)
			return -1;
		tm_instance_info->dev_clnt_list[i] =//为该device创建一个client
			devices_manager_reg_clnt(device);
		if (tm_instance_info->dev_clnt_list[i] == NULL)
			return -1;
		list->device[i] = (char*)device;
		list->cnt++;
	}

	return (int)i;//记录该action下的device在tm_instace_info的dev_info_list的索引
}

3. Monitor监控

我们在thermal_monitor中创建了一个thread执行sensor_monitor来监控，这里我们来看下这个函数，这里主要就是循环调用handle_thresh_sig来监控温度，这里有一个重要的地方就是在while循环中会等待condition，直到有其他地方把这个condition broadcast了。

static void *sensor_monitor(void *data)
{
	uint32_t idx;

	for (idx = 0; idx < tm_cnt; idx++)
		THRESH_MASK_SET(idx);

	/* Set initial thresholds */
	handle_thresh_sig();//第一个调用

	/* Vote okay to disable kernel mitigation */
	device_clnt_cancel_request(kernel_dev);

	thermal_server_register_client_req_handler("override", override_notify, NULL);//之前分析为client注册回调
	thermal_server_register_client_req_handler(CONFIG_QUERY_CLIENT, config_query_notify, NULL);
	thermal_server_register_client_req_handler(CONFIG_SET_CLIENT, config_set_notify, NULL);

	while (exit_daemon != 1) {
		dbgmsg("%s: Wait for EV", __func__);
		pthread_mutex_lock(&wait_mutex);
		if (!THRESH_MASK_ANY_SET) {
			pthread_cond_wait(&wait_cond, &wait_mutex);//等待condition
		}
		pthread_mutex_unlock(&wait_mutex);

		dbgmsg("%s: Thresh EVT", __func__);
		handle_thresh_sig();//循环调用handle_thresh_sig来监控sensor温度是否满足monitor算法
	}

	free(device_info_arr);
	return NULL;
}

我们来看下handle_thresh_sig函数，先遍历所有的tm_instance_info，然后过滤器setting下面的data.tm num_thresholds小于1，以及disable的tm_instance_info，然后获取温度

static void handle_thresh_sig(void)
{
	......
	struct tm_instance_info *sensor;
	struct setting_info *info;
	struct tm_setting *tm_info;
	union device_request req;

	/* Get temp and handle */
	for (idx = 0; idx < tm_cnt; idx++) {
		if (THRESH_MASK_IS_SET(idx) == 0) {
			continue;
		}

		sensor = &tm_states[idx];
		info = sensor->setting;
		tm_info = &(info->data.tm);

		if ((tm_info->num_thresholds < 1) ||
		    (sensor->disable)) {//过滤disable以及num_thresholds
			/* Unmask TM instance as handled */
			THRESH_MASK_CLR(idx);
			continue;
		}

		max_thr = (int)tm_info->num_thresholds;
		sensor_temp = sensor_get_temperature(sensor);//获取温度
		dbgmsg("%s: TM Id %s Sensor %s Temp %d\n", __func__,
		       info->desc, tm_info->sensor, sensor_temp);

		lvl_max = -1;
		lvl_min = INT_MAX;
		for (i = max_thr - 1; i >= 0; i--) {//遍历每一个setting下的t数组
			/* Scan for new alarm conditions */
			if (sensor_threshold_trigger(sensor_temp, sensor, i)) {//是否触发阈值了
				if (sensor->lvl_alarm[i] == 0) {//之前没有记录触发
					thermalmsg(LOG_LVL_DBG,
					    (LOG_LOGCAT | LOG_TRACE),
					    "TM Id '%s' Sensor '%s' - alarm "
					    " raised %d at %d.%d degC\n",
					    info->desc, tm_info->sensor, i + 1,
					    RCONV(sensor_temp),
					    (sensor_temp%1000)/100);
					sensor->lvl_alarm[i] = 1;//记录
					alarm_raised = 1;//需要触发
				}
				if (i > lvl_max)
					lvl_max = i;//lvl_max就是记录触发的t数组的最后一个index
			}
			/* Scan for alarm clearing conditions */
			if (sensor_threshold_clear(sensor_temp, sensor, i)) {//是否低于小的阈值了（clr的温度值）
				if (sensor->lvl_alarm[i] == 1) {//之前记录触发了
					thermalmsg(LOG_LVL_DBG,
					    (LOG_LOGCAT | LOG_TRACE),
					    "TM Id '%s' Sensor '%s' - alarm "
					    "cleared %d at %d.%d degC\n",
					    info->desc, tm_info->sensor, i + 1,
					    RCONV(sensor_temp),
					    (sensor_temp%1000)/100);
					sensor->lvl_alarm[i] = 0;//触发的记录清除
					alarm_cleared = 1;//清除
				}
				if (i < lvl_min)
					lvl_min = i;//清除的t数组最小的index
			}
		}

		/* Update temperature thresholds */
		if (alarm_raised) {
			threshold_type = THRESHOLD_CROSS;
			threshold_level = lvl_max + 1;
		} else if (alarm_cleared) {
			threshold_type = THRESHOLD_CLEAR;
			threshold_level = lvl_min;
		} else {
			threshold_type = THRESHOLD_NOCHANGE;
			threshold_level = sensor->last_lvl;
		}
		sensor->last_lvl = threshold_level;

		pthread_mutex_lock(&wait_mutex);
		/* Unmask TM instance as handled */
		THRESH_MASK_CLR(idx);
		pthread_mutex_unlock(&wait_mutex);

		sensor_update_thresholds(sensor, threshold_type,
					 threshold_level, idx);

		if (!alarm_raised && !alarm_cleared) {//没有新的触发或者清除之前continue
			continue;
		}

		/* Perform actions on highest level alarm */
		for (i = max_thr - 1; i >= 0; i--) {//从settings最大的开始
			if (sensor->lvl_alarm[i] == 0)//不满足触发条件的continue
				continue;

			for (j = 0; j < tm_info->t[i].num_actions; j++) {//满足每一个t数组的sensor温度触发，t数组的所有device都执行action
				action_idx = tm_info->t[i].actions[j].device_idx;//获取每个setting的action的device index
				dev_info = sensor->dev_info_list[action_idx];
				if (dev_info == NULL)
					continue;

				action_info = tm_info->t[i].actions[j].info;//调整的值

				sensor->action_mask |= (1U << action_idx);
				req.value = action_info;
				switch(dev_info->dev_type) {根据类型区分
				......
				case DEVICE_OP_VALUE_TYPE:
				case DEVICE_GENERIC_TYPE:
					device_clnt_request(sensor->dev_clnt_list[action_idx], &req);//每一个device的client申请调整
					break;
				case DEVICE_NONE_TYPE:
					break;
				default:
					msg("Unknown action %s\n", dev_info->name);
				}
			}

			break;
		}

我们来看下获取温度的函数sensor_get_temperature函数，是通过sensors_manager_read_trip_temp函数，传进去的参数是每个tm_instnace_info的sensor的client，每一个tm_instance_info都为其sensor创建一个client。

static int sensor_get_temperature(struct tm_instance_info *setting)
{
	int temp = 0;

	if (setting == NULL ||
	    setting->ts_clnt == NULL) {
		return -EFAULT;
	}

	temp = sensors_manager_read_trip_temp(setting->ts_clnt);
	dbgmsg("TM Id %s Sensor %s Reading %d\n",
	       setting->setting->desc,
	       setting->setting->data.tm.sensor, temp);

	return temp;
}

sensors_manager_read_trip_temp函数还是通过client的sensor来获取温度，sensors_manager_read函数还是通过sensor的get_temperature函数。

int sensors_manager_read_trip_temp(sensor_clnt_handle clnt)
{
	struct sensors_mgr_sensor_info *sensor_mgr = NULL;
	struct sensor_client_type   *client = clnt;
	int ret_val = INT32_MIN;

	if (client == NULL) {
		msg("%s: Invalid args.\n", __func__);
		return ret_val;
	}

	if (validate_clnt(client) != 0)
		return ret_val;

	sensor_mgr = client->sensor_mgr;

	if (!sensor_mgr->get_trip_temperature)//没有赋值这个函数
		return sensors_manager_read(clnt);

	ret_val = sensor_mgr->get_trip_temperature(sensor_mgr);
	thermalmsg(LOG_LVL_DBG, (LOG_LOGCAT | LOG_LOCAL_SOCKET
			| LOG_TRACE), "%s:%s:%d mC\n", SENSORS,
			sensor_mgr->name, ret_val);

	return ret_val;
}

我们再来看sensor_threshold_trigger函数，正常流程是走else的而且是没有override_mode的，因此只要温度大于lvl_trig就是触发了。

static int sensor_threshold_trigger(int value, struct tm_instance_info *sensor, int level)
{
	struct tm_setting *tm_info = &(sensor->setting->data.tm);

	if (tm_info->descending_thresh) {
		if (value <= tm_info->t[level].lvl_trig)
			return 1;
		else
			return 0;
	} else {
		int active_trig = tm_info->t[level].lvl_trig;

		if (override_mode)
			active_trig += tm_info->override;;

		if (value >= active_trig)
			return 1;
		else
			return 0;
	}
}

还是分析handle_thresh_sig函数，触发之后最后还是调用了device_clnt_request，每一个setting下面action满足条件的device的client都会申请一个request，device_clnt_request这个函数我们在分析device初始化的时候分析过了。像device的type是DEVICE_OP_VALUE_TYPE类型的值我们会取所有client的最小值（比如cpu），像device的type是DEVICE_GENERIC_TYPE我们会取所有client的最大值，最终会到device的action函数来控制。

int device_clnt_request(device_clnt_handle clnt, union device_request *req)
{
	struct devices_manager_dev *dev_mgr = NULL;
	struct device_clnt   *client = clnt;
	int ret_val = 0;

	if ((client == NULL) || (req == NULL)) {
		msg("%s: Invalid args.\n", __func__);
		return -(EINVAL);
	}

	ret_val = validate_clnt(client);
	if (ret_val != 0)
		return ret_val;

	dev_mgr = client->dev_mgr;

	switch (dev_mgr->dev_info.dev_type) {
	case DEVICE_GENERIC_TYPE:
		ret_val = devices_manager_set_lvl(dev_mgr, client, req->value);
		break;
	case DEVICE_OP_VALUE_TYPE:
		ret_val = devices_manager_set_op_value(dev_mgr, client,
						       req->value);
		break;
	case DEVICE_DIRECT_ACTION_TYPE:
		ret_val = devices_manager_set_direct_action(dev_mgr, client,
							    req);
		break;
	default:
		dbgmsg("%s: Unhandled dev_type %d", __func__,
		       dev_mgr->dev_info.dev_type);
		break;
	}
	return ret_val;
}

比如我们拿gpu举例，会调用devices_manager_set_op_value函数

static int devices_manager_set_op_value(struct devices_manager_dev *dev_mgr,
					struct device_clnt *client,
					int dev_op_value)
{
	uint32_t lvl_idx = 0;

	if (dev_op_value < 0) {
		msg("%s: Invalid args.\n", __func__);
		return -(EINVAL);
	}

	dev_mgr = client->dev_mgr;

	if (dev_mgr->dev_info.max_dev_op_value_valid == 0) {
		msg("%s: dev_op invalid.\n", __func__);
		return -(EFAULT);
	}

	dev_op_value = MIN(dev_op_value, dev_mgr->dev_info.max_dev_op_value);//去client和device的max_dev_op_value的最小值

	if (dev_mgr->lvl_info && (dev_mgr->dev_info.num_of_levels > 0)) {
		/* Translate to dev_op_value to supported mitigation value */
		for (lvl_idx = 0; lvl_idx < dev_mgr->dev_info.num_of_levels; lvl_idx++) {
			if (dev_mgr->lvl_info[lvl_idx].lvl.value <= dev_op_value)
				break;
		}
		if (lvl_idx >=  dev_mgr->dev_info.num_of_levels) {
			/* Apply highest lvl of mitigation possible */
			lvl_idx = dev_mgr->dev_info.num_of_levels - 1U;
		}
		dev_op_value = dev_mgr->lvl_info[lvl_idx].lvl.value;//找到gpu合适的一个档位（就是刚比client的值小）
	}

	pthread_mutex_lock(&clnt_mtx);
	client->request_active = 1;
	client->request.value = dev_op_value;
	pthread_mutex_unlock(&clnt_mtx);
	dbgmsg("%s: DEV %s, op_value %d\n", __func__, dev_mgr->dev_info.name,
	       dev_op_value);

	update_dev_state(dev_mgr);

	return dev_mgr->active_req.value;
}

update_dev_state函数就是得到类型为DEVICE_OP_VALUE_TYPE的client的最小值，然后去执行device的action操作，value就是这个所有client的最小值。

static int update_dev_state(struct devices_manager_dev *dev_mgr)
{
	union device_request req;
	struct device_clnt *client = dev_mgr->client_list;

	if ((dev_mgr->dev_info.dev_type != DEVICE_GENERIC_TYPE) &&
	    (dev_mgr->dev_info.dev_type != DEVICE_OP_VALUE_TYPE))
		return -(EFAULT);

	pthread_mutex_lock(&clnt_mtx);

	if (dev_mgr->dev_info.dev_type == DEVICE_GENERIC_TYPE) {
		/* Start from min level to find the highest existing client request */
		req.value = dev_mgr->dev_info.min_lvl;

		/* Walk client list to find highest mitigation level */
		while (client != NULL) {
			if (client->request_active)//这个类型找最大的
				req.value = MAX(req.value, client->request.value);
			client = client->next_clnt;
		}
	} else if (dev_mgr->dev_info.dev_type == DEVICE_OP_VALUE_TYPE) {
		/* Start from max allowable value find lowest request */
		req.value = dev_mgr->dev_info.max_dev_op_value;

		/* Walk client list to find highest mitigation level */
		while (client != NULL) {
			if (client->request_active)
				req.value = MIN(req.value, client->request.value);//gpu这种找所有client的最小的
			client = client->next_clnt;
		}
	}

	if (dev_mgr->active_req.value != req.value) {
		dev_mgr->active_req.value = req.value;

		if (dev_mgr->action)
			dev_mgr->action(dev_mgr);//执行device的action函数。

		/* Notify clients */
		client = dev_mgr->client_list;
		while (client != NULL) {
			if (client->cb_func != NULL)
				client->cb_func(client, &req,//调用client的回调
						client->cb_usr_data);
			client = client->next_clnt;
		}
	}
	pthread_mutex_unlock(&clnt_mtx);
	return 0;
}

我们继续分析handle_thresh_sig函数，当alarm_cleared而且当lvl_min为0，这个代表一个setting下面的t数组所有的触发都清除了，这个时候我们再调用clear_all_alarms函数。

		if (alarm_cleared) {
			/* Handle alarm clearing cases */
			if (lvl_min == 0) {
				dbgmsg("Clearing all alarms\n");
				clear_all_alarms(sensor);
			}

clear_all_alarms是该setting下的所有的device的client之前像device的申请的request都取消。

static void clear_all_alarms(struct tm_instance_info *sensor)
{
	uint32_t i;

	for (i = 0; i < MAX_ACTIONS_PER_TM_INSTANCE; i++) {
		if (sensor->dev_clnt_list[i] == NULL)
			continue;

		/* check if action may have been set */
		if ((sensor->action_mask & (1U << i)) == 0)
			continue;

		switch(sensor->dev_info_list[i]->dev_type) {
		case DEVICE_GENERIC_TYPE:
		case DEVICE_OP_VALUE_TYPE:
			device_clnt_cancel_request(sensor->dev_clnt_list[i]);
			break;
		default:
			dbgmsg("%s: No clearing of action %s\n", __func__,
			       sensor->dev_info_list[i]->name);
		}
	}

	sensor->action_mask = 0;
}

这样monitor大致流程我们知道了。我们再来看下handle_thresh_sig下面这段代码，逆序遍历每个setting下面的t数组如果温度超过了设置的值且没有记录，就记为需要触发，并且我们会记录下lvl_max就是触发的那个最大的t数组的index。同时也会记录被清除的t数组的最小index。然后调用sensor_update_thresholds函数。

		for (i = max_thr - 1; i >= 0; i--) {
			/* Scan for new alarm conditions */
			if (sensor_threshold_trigger(sensor_temp, sensor, i)) {
				if (sensor->lvl_alarm[i] == 0) {
					thermalmsg(LOG_LVL_DBG,
					    (LOG_LOGCAT | LOG_TRACE),
					    "TM Id '%s' Sensor '%s' - alarm "
					    " raised %d at %d.%d degC\n",
					    info->desc, tm_info->sensor, i + 1,
					    RCONV(sensor_temp),
					    (sensor_temp%1000)/100);
					sensor->lvl_alarm[i] = 1;
					alarm_raised = 1;
				}
				if (i > lvl_max)
					lvl_max = i;//记录触发的t数组的最大index
			}
			/* Scan for alarm clearing conditions */
			if (sensor_threshold_clear(sensor_temp, sensor, i)) {
				if (sensor->lvl_alarm[i] == 1) {
					thermalmsg(LOG_LVL_DBG,
					    (LOG_LOGCAT | LOG_TRACE),
					    "TM Id '%s' Sensor '%s' - alarm "
					    "cleared %d at %d.%d degC\n",
					    info->desc, tm_info->sensor, i + 1,
					    RCONV(sensor_temp),
					    (sensor_temp%1000)/100);
					sensor->lvl_alarm[i] = 0;
					alarm_cleared = 1;
				}
				if (i < lvl_min)
					lvl_min = i;//清除就是记录t数组最小的index
			}
		}

		/* Update temperature thresholds */
		if (alarm_raised) {//如果这次是升温超过某个设定值
			threshold_type = THRESHOLD_CROSS;
			threshold_level = lvl_max + 1;//该值为最大值+1
		} else if (alarm_cleared) {//这次是降温，小于某个clr的温度值
			threshold_type = THRESHOLD_CLEAR;
			threshold_level = lvl_min;//该值为最小值。
		} else {
			threshold_type = THRESHOLD_NOCHANGE;
			threshold_level = sensor->last_lvl;
		}
		sensor->last_lvl = threshold_level;

		pthread_mutex_lock(&wait_mutex);
		/* Unmask TM instance as handled */
		THRESH_MASK_CLR(idx);
		pthread_mutex_unlock(&wait_mutex);

		sensor_update_thresholds(sensor, threshold_type,
					 threshold_level, idx);

我们再来看sensor_update_thresholds函数，不得不说上面的设计挺巧妙的，我们现在无非两种情况：

1.如果现在是升温超过某个设定值，那么下一个升温的温度时，现在t数组触发最大的那个index+1的温度值，而clr的温度应该就是t数组最大的那个index当前的set_point_clr值。而这两个值就是sensor去监控的时候满足了，就会触发monitor算法的流程。

2. 如果现在是降温低于某个set_point_clr值了，那么下一个sensor要检测升温的值应该就是t数组记录的最小值index的set_point的温度值,而下一个降温的值就是当前t数组最小值index-1

现在把下一次sensor要检测的low和high记录好了，就可以往下调用sensors_manager_set_thresh_lvl函数。

static void sensor_update_thresholds(struct tm_instance_info *setting,
				     int threshold_type, int level,
				     uint32_t idx)
{
	struct sensor_thresh_req ts_thresh_req;
	struct thresholds_req_t *thresh = &ts_thresh_req.thresh;
	struct tm_setting *tm_info = NULL;

	if (setting == NULL  ||
	    setting->ts_clnt == NULL) {
		msg("%s: Unexpected NULL", __func__);
		return;
	}

	tm_info = &(setting->setting->data.tm);
	memset(&ts_thresh_req, 0, sizeof(struct sensor_thresh_req));

	ts_thresh_req.notify_cb_func = sensor_thresh_notify;
	ts_thresh_req.notify_cb_data = (void *)(uintptr_t)idx;
	ts_thresh_req.polling_interval_valid = 1;
	ts_thresh_req.polling_interval = tm_info->sampling_period_ms;
	thresh->high_valid = 1;
	thresh->low_valid = 1;

	if (tm_info->descending_thresh) {//不分析
		.......
	} else {
		thresh->descending_threshold = 0;
		/* Rising trigger */
		if ((uint32_t)level >= tm_info->num_thresholds) {
			/* handle corner high case */
			thresh->high = tm_info->t[tm_info->num_thresholds - 1].lvl_trig;
			thresh->high_valid = 0;
		} else
			thresh->high = tm_info->t[level].lvl_trig;

		if (level <= 0) {
			/* handle corner low case */
			thresh->low = tm_info->t[0].lvl_clr;
			thresh->low_valid = 0;
		} else
			thresh->low = tm_info->t[level - 1].lvl_clr;

		if (override_mode) {
			thresh->high += tm_info->override;
			thresh->low += tm_info->override;
		}
	}
	sensors_manager_set_thresh_lvl(setting->ts_clnt, &ts_thresh_req);
}

sensors_manager_set_thresh_lvl函数主要就是把thresh_info放入client的request，然后将request_active的标志位置1，然后调用update_active_thresh函数。

int sensors_manager_set_thresh_lvl(sensor_clnt_handle clnt,
		       struct sensor_thresh_req *thresh_info)
{
	struct sensors_mgr_sensor_info *sensor_mgr = NULL;
	struct sensor_client_type   *client = clnt;
	int ret_val = 0;

	if (client == NULL) {
		msg("%s: Invalid args.\n", __func__);
		return -(EINVAL);
	}

	ret_val = validate_clnt(client);//验证client
	if (ret_val != 0)
		return ret_val;

	sensor_mgr = client->sensor_mgr;

	THERM_MUTEX_LOCK(&ts_clnt_mtx);
	if ( thresh_info == NULL) {
		/* Clear client request */
		client->request_active = 0;
		dbgmsg("%s: %s clear request.\n", __func__, sensor_mgr->name);
	} else if (validate_thresh_info(thresh_info) == 0) {
		memcpy(&client->request, thresh_info,
		       sizeof(struct sensor_thresh_req));
		client->request_active = 1;//申请的标志位
	}
	THERM_MUTEX_UNLOCK(&ts_clnt_mtx);

	/* Update thresholds. */
	update_active_thresh(sensor_mgr);
	return 0;
}

update_active_thresh函数就是去更新下sensor的thresh的值。

static int update_active_thresh(struct sensors_mgr_sensor_info *sensor_mgr)
{
	struct sensor_client_type  *client = NULL;
	struct sensor_thresh_req *active = NULL;
	uint8_t                active_req = 0;


	if (sensor_mgr == NULL)
		return -(EINVAL);

	active = &sensor_mgr->active_thresh;

	memset(active, 0, sizeof(struct sensor_thresh_req));
	active->thresh.low = INT32_MIN;
	active->thresh.high  = INT32_MAX;
	active->polling_interval = UINT32_MAX;

	client = sensor_mgr->client_list;
	THERM_MUTEX_LOCK(&ts_clnt_mtx);
	while (client != NULL) {
		if (!client->request_active) {//client没有申请的吧标志位直接continue
			client = client->next_clnt;
			continue;
		}

		struct sensor_thresh_req *thresh = &client->request;
		if (thresh->thresh.descending_threshold)
			active->thresh.descending_threshold = 1;
		/* Find active high */
		if (thresh->thresh.high_valid) {
			active->thresh.high_valid = 1;
			active->thresh.high = MIN(active->thresh.high,
							thresh->thresh.high);
		}

		/* Find active low */
		if (thresh->thresh.low_valid) {
			active->thresh.low_valid = 1;
			active->thresh.low = MAX(active->thresh.low,
						       thresh->thresh.low);
		}

		/* Find min polling interval */
		if (thresh->polling_interval_valid) {
			active->polling_interval_valid = 1;
			active->polling_interval = MIN(active->polling_interval,
							     thresh->polling_interval);
		}

		active_req = 1;
		client = client->next_clnt;
	}

	if ((active->thresh.high > active->thresh.low) &&
	    (active->thresh.high_valid || active->thresh.low_valid)) {
		/* We can take advantage of interrupt */
		sensor_mgr->active_thresh_valid = 1;
	} else {
		sensor_mgr->active_thresh_valid = 0;
	}

	/* Room for optimization if thresholds didn't change. */
	if (sensor_mgr->active_thresh_valid &&
	    sensor_mgr->update_thresholds) {
		sensor_mgr->update_thresholds(sensor_mgr);
	}

	if (!sensor_mgr->req_active && active_req) {
		/* Advertise there is now an active request available */
		pthread_mutex_lock(&(sensor_mgr->req_wait_mutex));
		sensor_mgr->req_active = 1;//sensor的申请标志置1
		pthread_cond_broadcast(&(sensor_mgr->req_wait_cond));
		pthread_mutex_unlock(&(sensor_mgr->req_wait_mutex));
	} else {
		sensor_mgr->req_active = active_req;
	}
	THERM_MUTEX_UNLOCK(&ts_clnt_mtx);
	return 0;
}

好到这里monitor算法的函数分析完了。

但是一般当我们第一次调用handle_thresh_sig函数的时候，这个时候通常不会超过设定的温度值，这个时候其实根本没有调频但是我们再去看sensor_monitor函数的主循环的时候，这个时候不能往下执行，因为在wait condition。

	while (exit_daemon != 1) {
		dbgmsg("%s: Wait for EV", __func__);
		pthread_mutex_lock(&wait_mutex);
		if (!THRESH_MASK_ANY_SET) {
			pthread_cond_wait(&wait_cond, &wait_mutex);
		}
		pthread_mutex_unlock(&wait_mutex);

		dbgmsg("%s: Thresh EVT", __func__);
		handle_thresh_sig();
	}

4. sensor监控

这个时候就要看sensor的sensor_monitor函数，之前分析sensor的博客我们分析过每一个sensor都会起一个thread执行sensor_monitor函数来监控每个sensor的温度。

static void *sensor_monitor(void *vsensor_mgr)
{
	struct sensors_mgr_sensor_info *sensor_mgr = vsensor_mgr;

	while (sensor_mgr->thread_shutdown != 1) {
		/* Wait here until there is actually a request to process */
		if (!sensor_mgr->req_active) {//需要sensor申请的标志位为1
			dbgmsg("%s: %s Wait for client request.\n", __func__, sensor_mgr->name);
			pthread_mutex_lock(&(sensor_mgr->req_wait_mutex));
			while (!sensor_mgr->req_active) {
				pthread_cond_wait(&(sensor_mgr->req_wait_cond),
						&(sensor_mgr->req_wait_mutex));
			}
			pthread_mutex_unlock(&(sensor_mgr->req_wait_mutex));
		}
		dbgmsg("%s: %s Sensor wait.\n", __func__, sensor_mgr->name);
		sensor_wait(sensor_mgr);

		if (sensor_mgr->get_trip_temperature)
			sensor_mgr->last_reading =
				sensor_mgr->get_trip_temperature(sensor_mgr);
		else
			sensor_mgr->last_reading =//获取温度保存在last_reading
				sensor_mgr->get_temperature(sensor_mgr);

		notify_clnts(sensor_mgr);
	}

	return NULL;
}

我们再来看notify_clnts函数，这个时候我们还是遍历client，看每个client是否有申请，只有上一次sensor读取的温度大于最高值，或者小于最小值，才会调用client的回调函数。这样其实好处很明显不用核心的算法线程，一直跑，只要超过high的值，或者低于low的值才会重新开启算法的流程。

static int notify_clnts(struct sensors_mgr_sensor_info *sensor_mgr)
{
	struct sensor_client_type *client = NULL;
	enum sensor_notify_event_type thresh_event;

	if (sensor_mgr == NULL)
		return -(EINVAL);

	client = sensor_mgr->client_list;

	THERM_MUTEX_LOCK(&ts_clnt_mtx);
	while (client != NULL) {//遍历每个client
		if (client->request_active) {//client的申请为为1
			struct thresholds_req_t *thresh = &client->request.thresh;

			/* Notify clients of thresh crossings */
			thresh_event = SENSOR_NOTIFY_NORMAL_THRESH_EVENT;
			if (thresh->high_valid &&
			    (sensor_mgr->last_reading >= thresh->high)) {
				thresh_event = SENSOR_NOTIFY_HIGH_THRESH_EVENT;
			} else if (thresh->low_valid &&
				   (sensor_mgr->last_reading <= thresh->low)) {
				thresh_event = SENSOR_NOTIFY_LOW_THRESH_EVENT;
			}

			if (thresh_event != SENSOR_NOTIFY_NORMAL_THRESH_EVENT) {
				client->request_active = 0;
				client->request.notify_cb_func(client,
							       thresh_event,
							       sensor_mgr->last_reading,
							       client->request.notify_cb_data);
			}
		}
		client = client->next_clnt;
	}
	THERM_MUTEX_UNLOCK(&ts_clnt_mtx);
	update_active_thresh(sensor_mgr);//这个方法上面分析过了，就是重新更新下sensor的thresh
	return 0;
}

然后我们再来看Monitor算法sensor_thresh_notify函数，就是会去broadcast condition。

static void sensor_thresh_notify(sensor_clnt_handle  clnt,
			   enum sensor_notify_event_type   event,
			   int                    reading,
			   void                  *data)
{
	if (NULL == clnt) {
		msg("%s: unexpected NULL", __func__);
		return;
	}

	if (((uintptr_t)data) >= tm_cnt) {
		msg("%s: unexpected idx %zd", __func__, (uintptr_t)data);
		return;
	}

	dbgmsg("%s: Update recieved %s %d", __func__,
	       tm_states[(uintptr_t)data].setting->desc,
	       reading);

	/* Notify the waiting thread */
	pthread_mutex_lock(&wait_mutex);
	THRESH_MASK_SET((uintptr_t)data);
	pthread_cond_broadcast(&wait_cond);
	pthread_mutex_unlock(&wait_mutex);
}

这个时候sensor_monitor函数才会继续执行。

	while (exit_daemon != 1) {
		dbgmsg("%s: Wait for EV", __func__);
		pthread_mutex_lock(&wait_mutex);
		if (!THRESH_MASK_ANY_SET) {
			pthread_cond_wait(&wait_cond, &wait_mutex);
		}
		pthread_mutex_unlock(&wait_mutex);

		dbgmsg("%s: Thresh EVT", __func__);
		handle_thresh_sig();
	}

5. 总结

Monitor算法是一种静态算法，sensor的sensor_monitor会去不断检测这个sensor的温度，当大于每个setting设置的high的温度或者小于low的温度，就会去启动monitor算法流程。

monitor的算法配置中，当每一个t数组中的一项lvl_trig的温度满足时，该下面的所有的aciton中的device都会执行（每个device的client都会申请，最后device执行action函数），而只有当sensor温度小于每一个t数组中最小一档的lvl_clr，该t数组下所有的device的client的申请才会被取消，才会重新统计每个device的所有client的最大或最小值（根据device的type不同）再调用device的action函数。

{
		.desc = "CAMERA_CAMCORDER_MONITOR",
		.algo_type = MONITOR_ALGO_TYPE,
		.data.tm = {
			.sensor = "tsens_tz_sensor1",
			.sampling_period_ms = 250,
			.num_thresholds = 3,
			._n_thresholds = 3,
			._n_to_clear = 3,
			._n_actions = 3,
			._n_action_info = 3,
			.t[0] = {
				.lvl_trig = 80000,
				.lvl_clr = 75000,
				.num_actions = 2,
				.actions[0] = {
					.device = "camera",
					.info = 1,
				},
				.actions[1] = {
					.device = "camcorder",
					.info = 1,
				},
			},
			.t[1] = {
				.lvl_trig = 85000,
				.lvl_clr = 80000,
				.num_actions = 2,
				.actions[0] = {
					.device = "camera",
					.info = 2,
				},
				.actions[1] = {
					.device = "camcorder",
					.info = 2,
				},
			},
			.t[2] = {
				.lvl_trig = 88000,
				.lvl_clr = 85000,
				.num_actions = 2,
				.actions[0] = {
					.device = "camera",
					.info = 10,
				},
				.actions[1] = {
					.device = "camcorder",
					.info = 10,
				},
			}
		},

温控daemon（六）Monitor算法

1. thermal_monitor函数

2. sensor_setup

3. Monitor监控

4. sensor监控

5. 总结

相关阅读

相关文章

相关问答

相关文档