Swift 标准库阅读笔记 - Dictionary

2019-06-02

Dictionary-Swift

Overview

Dictionary 使用两种存储策略: 本地存储(Native Storage) 和 Cocoa 存储方式,这次的分析主要是 native 的部分。

Dictionary 内部只有一个成员变量_variant, 类型是_Varient

1
internal var _varient: _Varient

关于 _Variant 的定义在文件 DictionaryVarient 中,

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
extension Dictionary {
internal struct _Variant {
internal var object: _BridgeStorage<__RawDictionaryStorage>

init(native: __owned _NativeDictionary<Key, Value>) {
self.object = _BridgeStorage(native: native._storage)
}

// 存储不需要桥接的原生类型
internal var asNative: _NativeDictionary<Key, Value> {
get {
return _NativeDictionary<Key, Value>(object.unflaggedNativeInstance)
}
set {
self = .init(native: newValue)
}
}
// 存储需要桥接的类型
internal var asCocoa: __CocoaDictionary {
return __CocoaDictionary(object.objCInstance)
}
}
}

同时, _Variant 也实现了 _DictionaryBuffer 接口, 用于在检查每个缓冲类型(asNativeasCocoa)都实现了需要的操作。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
internal protocol _DictionaryBuffer {
associatedtype Key
associatedtype Value
associatedtype Index

var startIndex: Index { get }
var endIndex: Index { get }
func index(after i: Index) -> Index
func index(forKey key: Key) -> Index?
var count: Int { get }

func contains(_ key: Key) -> Bool
func lookup(_ key: Key) -> Value?
func lookup(_ index: Index) -> (key: Key, value: Value)
func key(at index: Index) -> Key
func value(at index: Index) -> Value
}

而用于存储不需要桥接类型的 _NativeDictionary__RawDictionaryStorage 的包装类, 内部只有一个 __RawDictionaryStorage 类型的变量 _storage

它还提供了一些方法将实际上有三个连续数组组成的字典内存转换成逻辑上的bucket数组。而且,这个结构体将bucket数组中的第一个bucket和最后一个bucket在逻辑上链接起来,从而形成了一个bucket环,也就是说当你到达bucket数组的末尾并且调用next方法时,你又会回到bucket数组的开头。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
internal struct _NativeDictionary<Key: Hashable, Value> {
internal typealias Element = (key: Key, value: Value)

/// See this comments on __RawDictionaryStorage and its subclasses to
/// understand why we store an untyped storage here.
internal var _storage: __RawDictionaryStorage

/// Constructs an instance from the empty singleton.
internal init() {
self._storage = __RawDictionaryStorage.empty
}

/// Constructs a dictionary adopting the given storage.
internal init(_ storage: __owned __RawDictionaryStorage) {
self._storage = storage
}

internal var _keys: UnsafeMutablePointer<Key> {
return _storage._rawKeys.assumingMemoryBound(to: Key.self)
}

internal var _values: UnsafeMutablePointer<Value> {
return _storage._rawValues.assumingMemoryBound(to: Value.self)
}

_BridgeStorage 实际上区分 class@objc 两种可能性,从而提供不同的存储策略,为上层调用提供不同的接口以及类型判断。 通过 BuiltIn.BridgeObject 这个中间参数实现不同的存储策略。

结构体分配在栈上,而 __RawDictionaryStorage 相关的变量分配在堆里, 包括一些关于 Dictionary 的基本的属性,如count, capacity等。 在初始化 __RawDictionaryStorage 时,会在基本属性后分配真正存储字典键值的数组,_rawKeys_rawValues 指向存储这些元素的地址。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
static internal func allocate(
scale: Int8,
age: Int32?,
seed: Int?
) -> _DictionaryStorage {
// The entry count must be representable by an Int value; hence the scale's
// peculiar upper bound.
_internalInvariant(scale >= 0 && scale < Int.bitWidth - 1)

// 这里 & 应该是溢出操作符, bucketCount 的值 2 的 scale 次方
let bucketCount = (1 as Int) &<< scale
let wordCount = _UnsafeBitset.wordCount(forCapacity: bucketCount)
let storage = Builtin.allocWithTailElems_3(
_DictionaryStorage<Key, Value>.self,
wordCount._builtinWordValue, _HashTable.Word.self,
bucketCount._builtinWordValue, Key.self,
bucketCount._builtinWordValue, Value.self)

let metadataAddr = Builtin.projectTailElems(storage, _HashTable.Word.self)
let keysAddr = Builtin.getTailAddr_Word(
metadataAddr, wordCount._builtinWordValue, _HashTable.Word.self,
Key.self)
let valuesAddr = Builtin.getTailAddr_Word(
keysAddr, bucketCount._builtinWordValue, Key.self,
Value.self)
storage._count = 0
storage._capacity = _HashTable.capacity(forScale: scale)
storage._scale = scale
storage._reservedScale = 0
storage._extra = 0

if let age = age {
storage._age = age
} else {
// The default mutation count is simply a scrambled version of the storage
// address.
storage._age = Int32(
truncatingIfNeeded: ObjectIdentifier(storage).hashValue)
}

storage._seed = seed ?? _HashTable.hashSeed(for: storage, scale: scale)
storage._rawKeys = UnsafeMutableRawPointer(keysAddr)
storage._rawValues = UnsafeMutableRawPointer(valuesAddr)

// Initialize hash table metadata.
storage._hashTable.clear()
return storage
}

下面我们通过 Dictionary 的插入,查找以及扩容这三个方面来看看 Swift 的具体实现。

Insert

先看下当我们通常为Dictionary给定的 key 赋值时,会发生什么。

1
2
3
4
5
6
7
8
9
public subscript(key: Key) -> Value? {
set(newValue) {
if let x = newValue {
_variant.setValue(x, forKey: key)
} else {
removeValue(forKey: key)
}
}
}

在下标操作的 set 方法中,判断如果 newValue 不为 nil 的话,则调用 _Variant 实例的 setValue 方法来更新对应的值。

而在 _Variant 的 setValue 方法中,当存储的类型不需要进行桥接时,则继续调用 _NativeDictionarysetValue 方法。

1
2
3
4
5
6
7
8
9
internal mutating func setValue(_ value: __owned Value, forKey key: Key, isUnique: Bool) {
let (bucket, found) = mutatingFind(key, isUnique: isUnique)
if found {
// 如果给定的 key 已经存在,这里直接设置 values 数组里的值来更新
(_values + bucket.offset).pointee = value
} else {
_insert(at: bucket, key: key, value: value)
}
}

这里通过 mutatingFind 查找到合适的 bucket 来插入数据,如果已经存在则只更新 values 数组的值, 不存在则需要同时更新 keys 和 values 数组中的值。

mutatingFindfind 功能类似,具体的区别下面会有更详细的介绍。

insert 具体的实现和调用的方法如下,根据bucket记录的偏移量来更新 keysvalues 数组中数据。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
internal func _insert(at bucket: Bucket, key: __owned Key, value: __owned Value) {
_internalInvariant(count < capacity)
hashTable.insert(bucket)
uncheckedInitialize(at: bucket, toKey: key, value: value)
// 存储的数量+1
_storage._count += 1
}

// 更新和添加新键值对最终会调用的方法
// HashTable 中 bucket 相当于记录每个槽中是否有数据,而真正的数据存储在 keys 和 values 的数组中
internal func uncheckedInitialize(at bucket: Bucket, toKey key: __owned Key, value: __owned Value) {
defer { _fixLifetime(self) }
_internalInvariant(hashTable.isValid(bucket))
// keys & value 数组对应的位置分别初始化值
(_keys + bucket.offset).initialize(to: key)
(_values + bucket.offset).initialize(to: value)
}

而在 HashTable 中的 insert 方法其实只更新了标志位中的数据,标记这个 bucket 已经存入数据。

1
2
3
4
internal func insert(_ bucket: Bucket) {
_internalInvariant(!isOccupied(bucket))
words[bucket.word].uncheckedInsert(bucket.bit)
}

下面看看查找给定 key 对应的值时,Dictionary 做了些什么。

Find

当我们通过 Dictionary 的下标操作来查找给定 key 对应的数据时,会调用 _Variantlookup 方法。

1
2
3
4
5
public subscript(key: Key) -> Value? {
get {
return _variant.lookup(key)
}
}

而在_Variant中,如果字典中存储的是原生类型,则调用 _NativeDictionary 实例的 lookup 方法。

lookup 会调用 find 方法,查找应该插入的 bucket,之后在对应的 bucket 位置更新或者添加数据就可以。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
func lookup(_ key: Key) -> Value? {
if count == 0 {
// Fast path that avoids computing the hash of the key.
return nil
}
let (bucket, found) = self.find(key)
// 不存在对应的 bucket,返回 nil
guard found else { return nil }
return self.uncheckedValue(at: bucket)
}

internal func find(_ key: Key) -> (bucket: Bucket, found: Bool) {
return find(key, hashValue: self.hashValue(for: key))
}

// 返回给定 key 的哈希值, Key 遵守 Hashable 协议
internal func hashValue(for key: Key) -> Int {
return key._rawHashValue(seed: _storage._seed)
}

// 对于给定的 key 以及其 hashValue, 查找哈希表中对应的 bucket。
// 若不存在,则返回给定 key 应该存在的 bucket
// 线性探测法
internal func find(_ key: Key, hashValue: Int) -> (bucket: Bucket, found: Bool) {
let hashTable = self.hashTable
var bucket = hashTable.idealBucket(forHashValue: hashValue)
// 在哈希表中查找是否已经存在当前给定的bucket
while hashTable._isOccupied(bucket) {
// 当前 bucket 存在,且与给定的 key 匹配,返回当前 bucket
if uncheckedKey(at: bucket) == key {
return (bucket, true)
}
// bucket 赋值为下一个位置的 bucket
bucket = hashTable.bucket(wrappedAfter: bucket)
}
return (bucket, false)
}

_HashTable 中包括了哈希冲突的处理,查找并且判断 bucket 的状态。

Swift 使用的散列函数,用与操作符代替了取模操作符,提高效率, 即 hashValue & (bucketCount - 1)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// 返回给定 hashValue 对应的 bucket, 这里使用与运算代替模运算,提供效率
// bucketMask = bucketCount - 1
internal func idealBucket(forHashValue hashValue: Int) -> Bucket {
return Bucket(offset: hashValue & bucketMask)
}

// 判断给定的 bucket 是否存在,如果不存在这是一个可以插入数据的位置
internal func _isOccupied(_ bucket: Bucket) -> Bool {
_internalInvariant(isValid(bucket))
return words[bucket.word].uncheckedContains(bucket.bit)
}

/// The next bucket after `bucket`, with wraparound at the end of the table.
// 当存在哈希冲突时,返回给定 bucket 的下一个位置的 bucket
internal func bucket(wrappedAfter bucket: Bucket) -> Bucket {
// The bucket is less than bucketCount, which is power of two less than
// Int.max. Therefore adding 1 does not overflow.
return Bucket(offset: (bucket.offset &+ 1) & bucketMask)
}

Resize

随着哈希表中元素的数量越来越多,发生碰撞的概率就越来越大,Swift 使用线性探测来处理冲突,就是一旦发生冲突,就去寻找下 一个空的散列表地址,而碰撞概率变大后,每次查找的次数也会增加,这样势必会影响哈希表的速度。

为了保证哈希表的效率,系统必须要在某个临界点进行扩容处理。该临界点在当哈希表中元素的数量等于table数组长度*加载因子。但是扩容是一个非常耗时的过程,因为它需要重新计算这些数据在新table数组中的位置并进行复制处理。所以如果我们已经预知哈希表中元素的个数,那么预设元素的个数能够有效的提高性能。

下面我们来看看 Swift 是在什么时候进行扩容并且是如何进行扩容的。

上面我们看到更新或者添加新的键值对的过程中,会调用 _NativeDictionary 中的 mutatingFind 方法。 而 mutatingFindfind 的最大区别在于它假定我们会插入容器中不存在 key/value,或者更新已经存在的 key/value。 当 mutatingFind 返回时,会确保存储空间为唯一的持有,且插入新数据时有足够的容量。

ensureUnique 方法用来确认是否需要进行扩容, 如果需要则进行扩容和重哈希的操作。 如果当前 key/valueDictionary 中不存在,我们假设需要插入这组数据,则需要的容量为当前键值对数量 + 1,即 count + 1。 如果进行了扩容及重哈希的话,返回 true

// _NativeDictionary

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
  internal mutating func mutatingFind(_ key: Key,isUnique: Bool) -> (bucket: Bucket, found: Bool) {
let (bucket, found) = find(key)

// Prepare storage.
// If `key` isn't in the dictionary yet, assume that this access will end
// up inserting it. (If we guess wrong, we might needlessly expand
// storage; that's fine.) Otherwise this can only be a removal or an
// in-place mutation.
let rehashed = ensureUnique(
isUnique: isUnique,
capacity: count + (found ? 0 : 1))
guard rehashed else { return (bucket, found) }
let (b, f) = find(key)
if f != found {
KEY_TYPE_OF_DICTIONARY_VIOLATES_HASHABLE_REQUIREMENTS(Key.self)
}
return (b, found)
}

/// Ensure storage of self is uniquely held and can hold at least `capacity`
/// elements. Returns true if contents were rehashed.
internal mutating func ensureUnique(isUnique: Bool, capacity: Int) -> Bool {
// 如果需要的容量仍小于当前容器的容量,则不需要扩容,返回false
if _fastPath(capacity <= self.capacity && isUnique) {
return false
}
// 如果Dictionary 被唯一引用,重新分配所需要的内存空间
if isUnique {
resize(capacity: capacity)
return true
}
// 所需的容量仍小于当前的容量,不需要重哈希
if capacity <= self.capacity {
copy()
return false
}
// 重哈希的过程
copyAndResize(capacity: capacity)
return true
}

internal mutating func resize(capacity: Int) {
let capacity = Swift.max(capacity, self.capacity)
let newStorage = _DictionaryStorage<Key, Value>.resize(
original: _storage,
capacity: capacity,
move: true)
let result = _NativeDictionary(newStorage)
if count > 0 {
for bucket in hashTable {
let key = (_keys + bucket.offset).move()
let value = (_values + bucket.offset).move()
result._unsafeInsertNew(key: key, value: value)
}
// Clear out old storage, ensuring that its deinit won't overrelease the
// elements we've just moved out.
_storage._hashTable.clear()
_storage._count = 0
}
_storage = result._storage
}

internal mutating func copyAndResize(capacity: Int) {
let capacity = Swift.max(capacity, self.capacity)
let newStorage = _DictionaryStorage<Key, Value>.resize(
original: _storage,
capacity: capacity,
move: false)
// 新扩容的空间
let result = _NativeDictionary(newStorage)
if count > 0 {
for bucket in hashTable {
// 在新的存储空间中插入原来 Dictionary 的键值对
result._unsafeInsertNew(
// 这里取的是扩容前的 key 和 value
key: self.uncheckedKey(at: bucket),
value: self.uncheckedValue(at: bucket))
}
}
// 保存新扩容后的存储
_storage = result._storage
}

internal func _unsafeInsertNew(key: __owned Key, value: __owned Value) {
_internalInvariant(count + 1 <= capacity)
let hashValue = self.hashValue(for: key)
if _isDebugAssertConfiguration() {
// In debug builds, perform a full lookup and trap if we detect duplicate
// elements -- these imply that the Element type violates Hashable
// requirements. This is generally more costly than a direct insertion,
// because we'll need to compare elements in case of hash collisions.
let (bucket, found) = find(key, hashValue: hashValue)
guard !found else {
KEY_TYPE_OF_DICTIONARY_VIOLATES_HASHABLE_REQUIREMENTS(Key.self)
}
hashTable.insert(bucket)
uncheckedInitialize(at: bucket, toKey: key, value: value)
} else {
// 在 HashTable 中插入对应的 bucket,标记为该bucket已存数据
let bucket = hashTable.insertNew(hashValue: hashValue)
// 在 keys & values 数组中插入键值
uncheckedInitialize(at: bucket, toKey: key, value: value)
}
_storage._count &+= 1
}

_DictionaryStorageresize 方法会调用 _HashTablescale 方法确定扩容后的总容量,然后重新分配空间。

1
2
3
4
5
static internal func resize(original: __RawDictionaryStorage, capacity: Int, move: Bool) -> _DictionaryStorage {
let scale = _HashTable.scale(forCapacity: capacity)
// 根据需要扩容的倍数来重新分配空间
return allocate(scale: scale, age: nil, seed: nil)
}

_HashTable 中定义的最大装载因子和计算容量以及扩展倍数的方法。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
// 返回最大装载因子0.75
private static var maxLoadFactor: Double {
get { return 3 / 4 }
}
// 返回扩容后的最大容量
// scale 表示扩容的倍数,进行2倍的增长,容量 * 最大装载因子 = 最大容量
internal static func capacity(forScale scale: Int8) -> Int {
let bucketCount = (1 as Int) &<< scale
return Int(Double(bucketCount) * maxLoadFactor)
}

internal static func scale(forCapacity capacity: Int) -> Int8 {
let capacity = Swift.max(capacity, 1)
// 计算给定容量时和加载因子时所需要分配的最少空间
let minimumEntries = Swift.max(
Int((Double(capacity) / maxLoadFactor).rounded(.up)),
capacity + 1)
// The actual number of entries we need to allocate is the lowest power of
// two greater than or equal to the minimum entry count. Calculate its
// exponent.
// `binaryLogarithm` 即求log2 n
// 这里是log2 N 求所容量需要增加的倍数
// (Swift.max(minimumEntries, 2) - 1)._binaryLogarithm() 当前增长的倍数, +1 表示扩容后的
let exponent = (Swift.max(minimumEntries, 2) - 1)._binaryLogarithm() + 1
_internalInvariant(exponent >= 0 && exponent < Int.bitWidth)
// The scale is the exponent corresponding to the bucket count.
let scale = Int8(truncatingIfNeeded: exponent)
_internalInvariant(self.capacity(forScale: scale) >= capacity)
return scale
}

internal func insertNew(hashValue: Int) -> Bucket {
let hole = nextHole(atOrAfter: idealBucket(forHashValue: hashValue))
insert(hole)
return hole
}

通过以上我们知道哈希表的容量必须是2的幂,那么为什么要这么设计呢?答案当然是为了性能。通过键的哈希值进行定位桶位置的时候,调用了一个 idealBucket(forHashValue hashValue: Int) 方法。

1
2
3
internal func idealBucket(forHashValue hashValue: Int) -> Bucket {
return Bucket(offset: hashValue & bucketMask)
}

可以看到这里是将哈希值 hashValue 与桶数组的 length-1(实际上也是哈希表的容量-1)进行了一个与操作得出了对应的桶的位置,hash & (bucketCount-1)

但是为什么不采用 hashValue % bucketCount 这种计算方式呢? 因为相比 %/ 操作,采用&运算会提高性能。

通过限制 bucketCount 是一个2的幂数,hashValue & (bucketCount - 1)和 hashValue % bucketCount 结果是一致的。这就是为什么要限制容量必须是一个2的幂的原因。

举个简单的例子说明这两个操作的结果一致性:

假设有个 hashcode 是 311,对应的二进制是(1 0011 0111)

length 为 16,对应的二进制位(1 0000)

%操作:311 = 16 * 19 + 7;所以结果为7,二进制位(0111);

&操作:(1 0011 0111) & (0111) = 0111 = 7, 二进制位(0111)

1 0011 0111 = (1 0011 0000) + (0111) = (12^4 + 1 2^5 + 02^6 + 02^7 + 12^8 ) + 7 = 2^4(1 + 2 + 0 + 0 + 16) + 7 = 16 * 19 + 7; 和%操作一致。

如果length是一个2的幂的数,那么 length-1 就会变成一个 mask, 它会将 hashcode 低位取出来,hashcode 的低位实际就是余数,和取余操作相比,与操作会将性能提升很多。

最后看看 Dictionary 提供的一些具体的方法。

Dictionary

Dictionary 初始化方法:

1
2
3
4
5
6
7
8
9
10
/// Creates an empty dictionary.
public init() {
self.init(_native: _NativeDictionary())
}

// 创建一个空dict并提前指定最少的元素个数
// 当你知道创建dict后要添加多少元素时,可以通过这个初始化方法来避免创建后的存储缓冲重新分配
public init(minimumCapacity: Int) {
_variant = _Variant(native: _NativeDictionary(capacity: minimumCapacity))
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
public init<S: Sequence>(
uniqueKeysWithValues keysAndValues: __owned S
) where S.Element == (Key, Value) {
if let d = keysAndValues as? Dictionary<Key, Value> {
self = d
return
}
var native = _NativeDictionary<Key, Value>(
capacity: keysAndValues.underestimatedCount)
// '_MergeError.keyCollision' is caught and handled with an appropriate
// error message one level down, inside native.merge(_:...). We throw an
// error instead of calling fatalError() directly because we want the
// message to include the duplicate key, and the closure only has access to
// the conflicting values.
try! native.merge(
keysAndValues,
isUnique: true,
uniquingKeysWith: { _, _ in throw _MergeError.keyCollision })
self.init(_native: native)
}

通过给定的序列来创建 dict 时调用的初始化方法。

当我们想要通过给定键值对序列来创建 dict 时,可以通过指定 combine 闭包来决定在存在重复 Key 的情况下如何选择保留想要的 Value.

1
2
3
4
5
6
let pairsWithDuplicateKeys = [("a", 1), ("b", 2), ("a", 3), ("b", 4)]

let firstValues = Dictionary(pairsWithDuplicateKeys, uniquingKeysWith: { (current, _) in current })
// ["b": 2, "a": 1]
let lastValues = Dictionary(pairsWithDuplicateKeys, uniquingKeysWith: { (_, new) in new })
// ["b": 4, "a": 3]

combine 闭包接收当前值与新的Value为参数,并返回在最终dict中需要保留的值。

调用的初始化方法如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
public init<S: Sequence>(
_ keysAndValues: __owned S,
uniquingKeysWith combine: (Value, Value) throws -> Value
) rethrows where S.Element == (Key, Value) {
var native = _NativeDictionary<Key, Value>(
capacity: keysAndValues.underestimatedCount)
try native.merge(keysAndValues, isUnique: true, uniquingKeysWith: combine)
self.init(_native: native)
}

// _NativeDictionary
internal mutating func merge<S: Sequence>(
_ keysAndValues: __owned S,
isUnique: Bool,
uniquingKeysWith combine: (Value, Value) throws -> Value
) rethrows where S.Element == (Key, Value) {
var isUnique = isUnique
for (key, value) in keysAndValues {
//
let (bucket, found) = mutatingFind(key, isUnique: isUnique)
isUnique = true
if found {
do {
// 如果查找到有重复的key, newValue 设置为 combine 闭包中返回的值
let newValue = try combine(uncheckedValue(at: bucket), value)
_values[bucket.offset] = newValue
} catch _MergeError.keyCollision {
fatalError("Duplicate values for key: '\(k∏ey)'")
}
} else {
// 如果没有重复的key,直接将其插入到表中
_insert(at: bucket, key: key, value: value)
}
}
}
1
2
3
4
5
6
public init<S: Sequence>(
grouping values: __owned S,
by keyForValue: (S.Element) throws -> Key
) rethrows where Value == [S.Element] {
try self.init(_native: _NativeDictionary(grouping: values, by: keyForValue))
}

可以通过闭包来返回序列中元素在dict的Key。

1
2
3
let students = ["Kofi", "Abena", "Efua", "Kweku", "Akosua"]
let studentsByLetter = Dictionary(grouping: students, by: { $0.first! })
// ["E": ["Efua"], "K": ["Kofi", "Kweku"], "A": ["Abena", "Akosua"]]

当需要返回满足指定条件的键值对的新dict时:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
public __consuming func filter(
_ isIncluded: (Element) throws -> Bool
) rethrows -> [Key: Value] {
// FIXME(performance): Try building a bitset of elements to keep, so that we
// eliminate rehashings during insertion.
var result = _NativeDictionary<Key, Value>()
for element in self {
if try isIncluded(element) {
// 满足条件的键值对添加到result中
result.insertNew(key: element.key, value: element.value)
}
}
return Dictionary(_native: result)
}
}

同时, Dictionary 满足 Collection 协议:

1
2
3
4
5
6
7
8
9
10
11
12
13
extension Dictionary: Collection {
public typealias SubSequence = Slice<Dictionary>

@inlinable
public var startIndex: Index {
return _variant.startIndex
}

@inlinable
public var endIndex: Index {
return _variant.endIndex
}
}

Dictionary 的下标操作符实现, 通过 _Variant 实现的 _DictionaryBuffer 协议定义的接口方法, 根据字典中的具体类型来调用对应的 asNativeasCocoa 对应的方法。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
public subscript(key: Key) -> Value? {
get {
return _variant.lookup(key)
}
set(newValue) {
if let x = newValue {
_variant.setValue(x, forKey: key)
} else {
removeValue(forKey: key)
}
}
_modify {
defer { _fixLifetime(self) }
yield &_variant[key]
}
}

通过给定的key去访问对应的值时,如果没有给定的key,则返回默认的值。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
public subscript(
key: Key, default defaultValue: @autoclosure () -> Value
) -> Value {
@inline(__always)
get {
return _variant.lookup(key) ?? defaultValue()
}
@inline(__always)
_modify {
let (bucket, found) = _variant.mutatingFind(key)
let native = _variant.asNative
if !found {
let value = defaultValue()
native._insert(at: bucket, key: key, value: value)
}
let address = native._values + bucket.offset
defer { _fixLifetime(self) }
yield &address.pointee
}
}

用法:

1
2
3
4
5
6
7
8
9
10
11
12
13
var responseMessages = [200: "OK",
403: "Access forbidden",
404: "File not found",
500: "Internal server error"]

let httpResponseCodes = [200, 403, 301]
for code in httpResponseCodes {
let message = responseMessages[code, default: "Unknown response"]
print("Response \(code): \(message)")
}
// Prints "Response 200: OK"
// Prints "Response 403: Access Forbidden"
// Prints "Response 301: Unknown response"

返回通过给定的 transform 闭包对字典中的 Value 进行映射后的新字典。时间复杂度 O(n)。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
  public func mapValues<T>(
_ transform: (Value) throws -> T
) rethrows -> Dictionary<Key, T> {
return try Dictionary<Key, T>(_native: _variant.mapValues(transform))
}

// Dictionary._Varient
internal func mapValues<T>(
_ transform: (Value) throws -> T
) rethrows -> _NativeDictionary<Key, T> {
#if _runtime(_ObjC)
guard isNative else {
return try asCocoa.mapValues(transform)
}
#endif
return try asNative.mapValues(transform)
}


// _NativeDictionary
internal func mapValues<T>(
_ transform: (Value) throws -> T
) rethrows -> _NativeDictionary<Key, T> {
let resultStorage = _DictionaryStorage<Key, T>.copy(original: _storage)
_internalInvariant(resultStorage._seed == _storage._seed)
let result = _NativeDictionary<Key, T>(resultStorage)
// Because the current and new buffer have the same scale and seed, we can
// initialize to the same locations in the new buffer, skipping hash value
// recalculations.
for bucket in hashTable {
let key = self.uncheckedKey(at: bucket)
let value = self.uncheckedValue(at: bucket)
try result._insert(at: bucket, key: key, value: transform(value))
}
return result
}

对应的compactMap版本,只返回非 nil 的 value。

1
2
3
4
5
6
7
8
9
10
11
public func compactMapValues<T>(
_ transform: (Value) throws -> T?
) rethrows -> Dictionary<Key, T> {
let result: _NativeDictionary<Key, T> =
try self.reduce(into: _NativeDictionary<Key, T>()) { (result, element) in
if let value = try transform(element.value) {
result.insertNew(key: element.key, value: value)
}
}
return Dictionary<Key, T>(_native: result)
}

使用实例:

1
2
3
4
5
6
7
let data = ["a": "1", "b": "three", "c": "///4///"]

let m: [String: Int?] = data.mapValues { str in Int(str) }
// ["a": 1, "b": nil, "c": nil]

let c: [String: Int] = data.compactMapValues { str in Int(str) }
// ["a": 1]

同上面的初始化方法, merge方法将键值对序列/字典合并到当前字典中,并且可以通过 combine 闭包来知道想要的 value。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
public mutating func merge<S: Sequence>(
_ other: __owned S,
uniquingKeysWith combine: (Value, Value) throws -> Value
) rethrows where S.Element == (Key, Value) {
try _variant.merge(other, uniquingKeysWith: combine)
}

public mutating func merge(
_ other: __owned [Key: Value],
uniquingKeysWith combine: (Value, Value) throws -> Value
) rethrows {
try _variant.merge(
other.lazy.map { ($0, $1) }, uniquingKeysWith: combine)
}

合并当前字典与键值对序列/字典并且返回新的字典。

1
2
3
4
5
6
7
8
public __consuming func merging<S: Sequence>(
_ other: __owned S,
uniquingKeysWith combine: (Value, Value) throws -> Value
) rethrows -> [Key: Value] where S.Element == (Key, Value) {
var result = self
try result._variant.merge(other, uniquingKeysWith: combine)
return result
}

示例:

1
2
3
4
5
6
7
let dictionary = ["a": 1, "b": 2]
let otherDictionary = ["a": 3, "b": 4]

let keepingCurrent = dictionary.merging(otherDictionary) { (current, _) in current }
// ["b": 2, "a": 1]
let replacingCurrent = dictionary.merging(otherDictionary) { (_, new) in new }
// ["b": 4, "a": 3]

返回当前字典索引key或者value的集合

1
2
3
4
5
6
7
8
9
10
11
12
13
public var keys: Keys {
// FIXME(accessors): Provide a _read
get {
return Keys(_dictionary: self)
}
}

public var values: Values {
// FIXME(accessors): Provide a _read
get {
return Values(_dictionary: self)
}
}

翻完 Dictionary 的源码之后,感觉对 Swift 中哈希表的具体实现有了更清楚的认识,还有一些之前并没有注意到的比较实用的方法,如 mergegrouping等。以后要多花点时间来多看看标准库中的具体实现。