linux 内核 ida机制分析
在描述ida之前,需要读者对linux 内核的radix-tree有一定的了解。关于radix-tree(基数树)的描述,作者在之前有专门的文章对其进行描述,这里不再赘述。
IDA是什么?内核文档中解释如下:
/*** IDA description** The IDA is an ID allocator which does not provide the ability to associate an ID with a pointer. * As such, it only needs to store one bit per ID, and so is more space efficient than an IDR. To use an IDA,* * define it using DEFINE_IDA() (or embed a &struct ida in a data structure,then initialise it using ida_init()). * To allocate a new ID, call ida_simple_get(). To free an ID, call ida_simple_remove().** If you have more complex locking requirements, use a loop around ida_pre_get() and ida_get_new() to allocate a new ID. * Then use ida_remove() to free an ID. * You must make sure that ida_get_new() and ida_remove() cannot be called at the same time as each other for the same IDA.** You can also use ida_get_new_above() if you need an ID to be allocated above a particular number. * ida_destroy() can be used to dispose of an IDA without needing to free the individual IDs in it. * You can use ida_is_empty() to find out whether the IDA has any IDs currently allocated.** IDs are currently limited to the range [0-INT_MAX]. * If this is an awkward limitation, it should be quite straightforward to raise the maximum.*/
ida只处理id的分配,不关注与id关联的item,id在shift=0层radix_tree_node节点上的slots[]无需存储item所以就就空闲出来了,ida在对应的slots[]上存放bitmap,因为标记bitmap内的id的分配情况。这样整个ida的radix-tree的深度就缩减了。
bitmap长度为 IDA_BITMAP_BITS(128),id去除bitmap标记的位后,index为id/IDA_BITMAP_BITS。
将index作为起始id,[index,IDA_MAX)范围内 在radix-tree中进行处理id,分配完成后将bitmap中id%IDA_BITMAP_BITS的bit位置位,表示数据已被分配。
idr或者ida是在[start,end)范围内分配id,根据radix-tree中id分配情况,最终分配出来的id可能>=start.
1、数据结构
/** IDA - IDR based id allocator, use when translation from id to pointer isn't necessary.*/#define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk *//*IDA_CHUNK所需字节数*/
#define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long)) /*128/64=2*/
/*IDA_CHUNK所需bit位数*/
#define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8) /*2*8*8=128*//** The IDA is even shorter since it uses a bitmap at the last level.*/
#define IDA_INDEX_BITS (8 * sizeof(int) - 1 - ilog2(IDA_BITMAP_BITS)) //8*4-1-7=32-8=24
#define IDA_MAX_PATH (DIV_ROUND_UP(IDA_INDEX_BITS, RADIX_TREE_MAP_SHIFT)) //DIV_ROUND_UP(24,6) = 4
#define IDA_PRELOAD_SIZE (IDA_MAX_PATH * 2 - 1) //4*2-1=7/*ida radix-tree树最大深度为4,4*6=24位*/
#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS) //0x80000000U/128(0x80)=0x1000000 (16,777,216)struct ida_bitmap {unsigned long bitmap[IDA_BITMAP_LONGS]; //bitmap[2]
};/*per-cpu变量*/
DECLARE_PER_CPU(struct ida_bitmap *, ida_bitmap);struct ida {struct radix_tree_root ida_rt;
};
2、接口说明
2.1、ida初始化
/*定义和初始化ida*/
#define DEFINE_IDA(name) struct ida name = IDA_INIT#define IDA_INIT { \.ida_rt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT), \
}/*初始化ida*/
static inline void ida_init(struct ida *ida)
{INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
}
2.2、ida分配id(ida_pre_get/ida_get_new/ida_get_new_above)
/*** ida_pre_get - reserve resources for ida allocation* @ida: ida handle* @gfp: memory allocation flags** This function should be called before calling ida_get_new_above(). If it* is unable to allocate memory, it will return %0. On success, it returns %1.*/
int ida_pre_get(struct ida *ida, gfp_t gfp)
{/** The IDA API has no preload_end() equivalent. Instead,* ida_get_new() can return -EAGAIN, prompting the caller* to return to the ida_pre_get() step.*/if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE)) //7preempt_enable();if (!this_cpu_read(ida_bitmap)) {/*为ida_bitmap分配空间*/struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);if (!bitmap)return 0;if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))kfree(bitmap);}return 1;
}注意:
1)、ida_get_new从0开始分配id,ida_get_new_above从传入的参数start开始分配id。
2)、ida_get_new_above再从void *切换到bitmap时,依赖于ida_pre_get为ida_bitmap分配的空间。/*** ida_get_new - allocate new ID* @ida: idr handle* @p_id: pointer to the allocated handle** Simple wrapper around ida_get_new_above() w/ @starting_id of zero.*/
static inline int ida_get_new(struct ida *ida, int *p_id)
{return ida_get_new_above(ida, 0, p_id);
}/*** ida_get_new_above - allocate new ID above or equal to a start id* @ida: ida handle* @start: id to start search at* @id: pointer to the allocated handle** Allocate new ID above or equal to @start. It should be called* with any required locks to ensure that concurrent calls to* ida_get_new_above() / ida_get_new() / ida_remove() are not allowed.* Consider using ida_simple_get() if you do not have complex locking* requirements.** If memory is required, it will return %-EAGAIN, you should unlock* and go back to the ida_pre_get() call. If the ida is full, it will* return %-ENOSPC. On success, it will return 0.** @id returns a value in the range @start ... %0x7fffffff.*/
int ida_get_new_above(struct ida *ida, int start, int *id)
{struct radix_tree_root *root = &ida->ida_rt;void __rcu **slot; /*二级指针*/struct radix_tree_iter iter;struct ida_bitmap *bitmap;unsigned long index;unsigned bit, ebit;int new;index = start / IDA_BITMAP_BITS;bit = start % IDA_BITMAP_BITS;/*为什么 + RADIX_TREE_EXCEPTIONAL_SHIFT???,因为ebit<IDA_BITMAP_BITS时 slots[]中存放的是一个void *的数据,为了和slots[]中存放bitmap进行区分,void *数据被设置了 RADIX_TREE_EXCEPTIONAL_ENTRY(2),导致void*的低2bit无法使用,所以最开始 + RADIX_TREE_EXCEPTIONAL_SHIFT*/ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT;slot = radix_tree_iter_init(&iter, index);for (;;) {if (slot) /*后面的循环会使用该条件,slot有效,当期slots[]无空闲数据,查找相邻slots[]是否有空闲数据*/slot = radix_tree_next_slot(slot, &iter,RADIX_TREE_ITER_TAGGED);if (!slot) {/*在[index,IDA_MAX)之间分配数据,分配出来的数据在iter->index更新,返回 &slots[offset],注意slot是二级指针*/slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX);if (IS_ERR(slot)) {if (slot == ERR_PTR(-ENOMEM))return -EAGAIN;return PTR_ERR(slot);}}if (iter.index > index) { /*分配出来的iter.index > index */bit = 0;ebit = RADIX_TREE_EXCEPTIONAL_SHIFT;}new = iter.index * IDA_BITMAP_BITS; /*数据还原*/bitmap = rcu_dereference_raw(*slot); /*在ida中slots[]中存放bitmap*/if (radix_tree_exception(bitmap)) { /*4字节未对齐,slots[]中存放的不是bitmap而是使用的void *数据*/unsigned long tmp = (unsigned long)bitmap;/*从ebit开始找zero bit位*/ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit);if (ebit < BITS_PER_LONG) {tmp |= 1UL << ebit; /*将这位置1*/rcu_assign_pointer(*slot, (void *)tmp); /*将数据重新写回slots[]中*/*id = new + ebit - RADIX_TREE_EXCEPTIONAL_SHIFT;return 0;}//ebit >= BITS_PER_LONG,将ida_bitmap申请的地址赋值给bitmap,ida_bitmap赋值为NULLbitmap = this_cpu_xchg(ida_bitmap, NULL);if (!bitmap)return -EAGAIN;/*将bitmap赋值给*slot,用bitmap替换到之前的unsigned long数据,会去掉最低2bit的 RADIX_TREE_EXCEPTIONAL_ENTRY标志*/memset(bitmap, 0, sizeof(*bitmap));bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT; /*会去掉最低2bit的 RADIX_TREE_EXCEPTIONAL_ENTRY标志*/rcu_assign_pointer(*slot, bitmap);}/*bitma有效,slots[]中存放的是bitmap地址*/if (bitmap) {/*从bitmap中寻找空闲位*/bit = find_next_zero_bit(bitmap->bitmap,IDA_BITMAP_BITS/*128*/, bit);new += bit;if (new < 0)return -ENOSPC;if (bit == IDA_BITMAP_BITS)continue; /*当期slots[]无空闲数据,查找相邻slots[]是否有空闲数据*/__set_bit(bit, bitmap->bitmap); /*设置当前bit 位*/if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))radix_tree_iter_tag_clear(root, &iter,IDR_FREE); /*当前节点bitmap内无任何空闲位,清除当前节点tags[IDR_FREE],表示该node内无空闲*/} else { /*bitmap无效,说明当前slots[]还未被使用,可分配*/new += bit;if (new < 0)return -ENOSPC;if (ebit < BITS_PER_LONG) {/*直接存放(void *)数据到slots[]中去,但是标记为RADIX_TREE_EXCEPTIONAL_ENTRY 异常entry和bitmap进行区分通过上面的代码可知,只有void *数据内无空闲位或ebit超过BITS_PER_LONG(64)时才会用bitmap来替换void *数据*/bitmap = (void *)((1UL << ebit) | RADIX_TREE_EXCEPTIONAL_ENTRY);radix_tree_iter_replace(root, &iter, slot, bitmap);*id = new;return 0;}/*ebit >= BITS_PER_LONG*/bitmap = this_cpu_xchg(ida_bitmap, NULL);if (!bitmap)return -EAGAIN;/*使用bitmap而非void *数据*/memset(bitmap, 0, sizeof(*bitmap));__set_bit(bit, bitmap->bitmap);radix_tree_iter_replace(root, &iter, slot, bitmap);}*id = new;return 0;}
}
2.3、ida删除id(ida_remove)
/*** ida_remove - Free the given ID* @ida: ida handle* @id: ID to free** This function should not be called at the same time as ida_get_new_above().*/
void ida_remove(struct ida *ida, int id)
{unsigned long index = id / IDA_BITMAP_BITS;unsigned offset = id % IDA_BITMAP_BITS;struct ida_bitmap *bitmap;unsigned long *btmp;struct radix_tree_iter iter;void __rcu **slot;/*返回index对应的 &slots[offset]地址并且设置iter*/slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index);if (!slot)goto err;bitmap = rcu_dereference_raw(*slot);if (radix_tree_exception(bitmap)) { /*void *数据*/btmp = (unsigned long *)slot;offset += RADIX_TREE_EXCEPTIONAL_SHIFT; /if (offset >= BITS_PER_LONG)goto err;} else { /*bitmap数据*/btmp = bitmap->bitmap;}if (!test_bit(offset, btmp))goto err;__clear_bit(offset, btmp);radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);/*删除id后,检查是否能够删除整个node*/if (radix_tree_exception(bitmap)) {if (rcu_dereference_raw(*slot) == (void *)RADIX_TREE_EXCEPTIONAL_ENTRY) /*无任何被使用,删除node*/radix_tree_iter_delete(&ida->ida_rt, &iter, slot);} else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) {kfree(bitmap);radix_tree_iter_delete(&ida->ida_rt, &iter, slot);}return;err:WARN(1, "ida_remove called for id=%d which is not allocated.\n", id);
}
2.4、ida分配id (ida_simple_get)
ida_simple_get内部在关开中断的条件下调用的ida_pre_get ida_get_new_above来进行id分配.
/*** ida_simple_get - get a new id.* @ida: the (initialized) ida.* @start: the minimum id (inclusive, < 0x8000000)* @end: the maximum id (exclusive, < 0x8000000 or 0)* @gfp_mask: memory allocation flags** Allocates an id in the range start <= id < end, or returns -ENOSPC.* On memory allocation failure, returns -ENOMEM.** Compared to ida_get_new_above() this function does its own locking, and* should be used unless there are special requirements.** Use ida_simple_remove() to get rid of an id.*/
int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,gfp_t gfp_mask)
{int ret, id;unsigned int max;unsigned long flags;BUG_ON((int)start < 0);BUG_ON((int)end < 0);if (end == 0)max = 0x80000000;else {BUG_ON(end < start);max = end - 1;}again:if (!ida_pre_get(ida, gfp_mask))return -ENOMEM;spin_lock_irqsave(&simple_ida_lock, flags); /*关闭中断*/ret = ida_get_new_above(ida, start, &id);if (!ret) {if (id > max) {ida_remove(ida, id);ret = -ENOSPC;} else {ret = id;}}spin_unlock_irqrestore(&simple_ida_lock, flags); /*恢复中断*/if (unlikely(ret == -EAGAIN))goto again;return ret;
}
2.5、ida释放id(ida_simple_remove)
ida_simple_remove内部是在关/开中断条件下调用的ida_remove来释放id.
/*** ida_simple_remove - remove an allocated id.* @ida: the (initialized) ida.* @id: the id returned by ida_simple_get.** Use to release an id allocated with ida_simple_get().** Compared to ida_remove() this function does its own locking, and should be* used unless there are special requirements.*/
void ida_simple_remove(struct ida *ida, unsigned int id)
{unsigned long flags;BUG_ON((int)id < 0);spin_lock_irqsave(&simple_ida_lock, flags); /*关中断*/ida_remove(ida, id);spin_unlock_irqrestore(&simple_ida_lock, flags); /*开中断*/
}
2.6、注销ida(ida_destroy)
/*** ida_destroy - Free the contents of an ida* @ida: ida handle** Calling this function releases all resources associated with an IDA. When* this call returns, the IDA is empty and can be reused or freed. The caller* should not allow ida_remove() or ida_get_new_above() to be called at the* same time.*/
void ida_destroy(struct ida *ida)
{struct radix_tree_iter iter;void __rcu **slot;/*遍历radix-tree slots[]*/radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);if (!radix_tree_exception(bitmap)) /*bitmap*/kfree(bitmap);radix_tree_iter_delete(&ida->ida_rt, &iter, slot);}
}
3、示例
ida分配数据后radix-tree形态如下: