看了ceph的关于snap的一些代码(snap.cc、snaprealm.cc、snapserver.cc、snapmapper.cc),作了标注,在此记录。
snap.cc主要是通过encode(序列化),将数据结构表示为二进制流的方式,decode(反序列化),以便通过网络传输或保存在磁盘等存储介质上。
以下是snaprealm.h的一些结构体的声明和注释
1 struct SnapRealm {
2 // realm state
3
4 sr_t srnode;
5
6 // in-memory state
7 MDCache *mdcache;
8 CInode *inode;
9
10 bool open; // set to true once all past_parents are opened
11 SnapRealm *parent;
12 set<SnapRealm*> open_children; // active children that are currently open
13 map<inodeno_t,SnapRealm*> open_past_parents; // these are explicitly pinned.
14
15 // cache
16 snapid_t cached_seq; // max seq over self and all past+present parents.
17 snapid_t cached_last_created; // max last_created over all past+present parents
18 snapid_t cached_last_destroyed;
19 set<snapid_t> cached_snaps;
20 SnapContext cached_snap_context;
21
22 bufferlist cached_snap_trace;
23
24 elist<CInode*> inodes_with_caps; // for efficient realm splits
25 map<client_t, xlist<Capability*>* > client_caps; // to identify clients who need snap notifications
snaprealm.cc:
1 #include "SnapRealm.h"
2 #include "MDCache.h"
3 #include "MDS.h"
4
5 #include "messages/MClientSnap.h"
6
7
8 /*
9 * SnapRealm
10 */
11
12 #define dout_subsys ceph_subsys_mds
13 #undef dout_prefix
14 #define dout_prefix _prefix(_dout, mdcache->mds->get_nodeid(), inode, srnode.seq, this)
15 static ostream& _prefix(std::ostream *_dout, int whoami, CInode *inode,
16 uint64_t seq, SnapRealm *realm) {
17 return *_dout << " mds." << whoami
18 << ".cache.snaprealm(" << inode->ino()
19 << " seq " << seq << " " << realm << ") ";
20 }
21
22 ostream& operator<<(ostream& out, const SnapRealm& realm)
23 {
24 out << "snaprealm(" << realm.inode->ino()
25 << " seq " << realm.srnode.seq
26 << " lc " << realm.srnode.last_created
27 << " cr " << realm.srnode.created;
28 if (realm.srnode.created != realm.srnode.current_parent_since)
29 out << " cps " << realm.srnode.current_parent_since;
30 out << " snaps=" << realm.srnode.snaps;
31 if (realm.srnode.past_parents.size()) {
32 out << " past_parents=(";
33 for (map<snapid_t, snaplink_t>::const_iterator p = realm.srnode.past_parents.begin();
34 p != realm.srnode.past_parents.end();
35 ++p) {
36 if (p != realm.srnode.past_parents.begin()) out << ",";
37 out << p->second.first << "-" << p->first
38 << "=" << p->second.ino;
39 }
40 out << ")";
41 }
42 out << " " << &realm << ")";
43 return out;
44 }
45
46 //添加parent到open_past_parents的map容器中
47 void SnapRealm::add_open_past_parent(SnapRealm *parent)
48 {
49 open_past_parents[parent->inode->ino()] = parent;
50 parent->inode->get(CInode::PIN_PASTSNAPPARENT); //标记为CInode::PIN_PASTSNAPPARENT
51 }
52
53 struct C_SR_RetryOpenParents : public MDSInternalContextBase {
54 SnapRealm *sr;
55 snapid_t first, last, parent_last;
56 inodeno_t parent;
57 MDSInternalContextBase* fin;
58 C_SR_RetryOpenParents(SnapRealm *s, snapid_t f, snapid_t l, snapid_t pl,
59 inodeno_t p, MDSInternalContextBase *c) :
60 sr(s), first(f), last(l), parent_last(pl), parent(p), fin(c) {}
61 MDS *get_mds() { return sr->mdcache->mds; }
62 void finish(int r) {
63 if (r < 0)
64 sr->_remove_missing_parent(parent_last, parent, r);
65 if (sr->_open_parents(fin, first, last))
66 fin->complete(0);
67 }
68 };
69
70 //删除找不到snapid的parent
71 void SnapRealm::_remove_missing_parent(snapid_t snapid, inodeno_t parent, int err)
72 {
73 map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.find(snapid);
74 if (p != srnode.past_parents.end()) {
75 dout(10) << __func__ << " " << parent << " [" << p->second.first << ","
76 << p->first << "] errno " << err << dendl;
77 srnode.past_parents.erase(p);
78 } else {
79 dout(10) << __func__ << " " << parent << " not found" << dendl;
80 }
81 }
82
83 //判定parents是否open,返回true or false
84 bool SnapRealm::_open_parents(MDSInternalContextBase *finish, snapid_t first, snapid_t last)
85 {
86 dout(10) << "open_parents [" << first << "," << last << "]" << dendl;
87 if (open)
88 return true;
89
90 // make sure my current parents' parents are open...
91 if (parent) {
92 dout(10) << " current parent [" << srnode.current_parent_since << ",head] is " << *parent
93 << " on " << *parent->inode << dendl;
94 if (last >= srnode.current_parent_since &&
95 !parent->_open_parents(finish, MAX(first, srnode.current_parent_since), last))
96 return false;
97 }
98
99 // and my past parents too!
100 assert(srnode.past_parents.size() >= open_past_parents.size());
101 if (srnode.past_parents.size() > open_past_parents.size()) {
102 for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.begin();
103 p != srnode.past_parents.end();
104 ++p) {
105 dout(10) << " past_parent [" << p->second.first << "," << p->first << "] is "
106 << p->second.ino << dendl;
107 CInode *parent = mdcache->get_inode(p->second.ino);
108 if (!parent) {
109 C_SR_RetryOpenParents *fin = new C_SR_RetryOpenParents(this, first, last, p->first,
110 p->second.ino, finish);
111 mdcache->open_ino(p->second.ino, mdcache->mds->mdsmap->get_metadata_pool(), fin);
112 return false;
113 }
114 assert(parent->snaprealm); // hmm!
115 if (!parent->snaprealm->_open_parents(finish, p->second.first, p->first))
116 return false;
117 if (!open_past_parents.count(p->second.ino)) {
118 add_open_past_parent(parent->snaprealm);
119 }
120 }
121 }
122
123 open = true;
124 return true;
125 }
126
127 //判断从first到last的snap的past_parents是否open
128 bool SnapRealm::have_past_parents_open(snapid_t first, snapid_t last)
129 {
130 dout(10) << "have_past_parents_open [" << first << "," << last << "]" << dendl;
131 if (open)
132 return true;
133
134 for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
135 p != srnode.past_parents.end();
136 ++p) {
137 if (p->second.first > last)
138 break;
139 dout(10) << " past parent [" << p->second.first << "," << p->first << "] was "
140 << p->second.ino << dendl;
141 if (open_past_parents.count(p->second.ino) == 0) {
142 dout(10) << " past parent " << p->second.ino << " is not open" << dendl;
143 return false;
144 }
145 if (!open_past_parents[p->second.ino]->have_past_parents_open(MAX(first, p->second.first),
146 MIN(last, p->first)))
147 return false;
148 }
149
150 open = true;
151 return true;
152 }
153
154
155 void SnapRealm::close_parents()
156 {
157 for (map<inodeno_t,SnapRealm*>::iterator p = open_past_parents.begin();
158 p != open_past_parents.end();
159 ++p)
160 p->second->inode->put(CInode::PIN_PASTSNAPPARENT); //解除标记CInode::PIN_PASTSNAPPARENT
161 open_past_parents.clear(); //清空
162 }
163
164
165 /*
166 * get list of snaps for this realm. we must include parents' snaps
167 * for the intervals during which they were our parent.
168 */
169 //将realm中的snaps取出并存入第一个参数set容器中
170 void SnapRealm::build_snap_set(set<snapid_t> &s,
171 snapid_t& max_seq, snapid_t& max_last_created, snapid_t& max_last_destroyed,
172 snapid_t first, snapid_t last)
173 {
174 dout(10) << "build_snap_set [" << first << "," << last << "] on " << *this << dendl;
175
176 if (srnode.seq > max_seq)
177 max_seq = srnode.seq;
178 if (srnode.last_created > max_last_created)
179 max_last_created = srnode.last_created;
180 if (srnode.last_destroyed > max_last_destroyed)
181 max_last_destroyed = srnode.last_destroyed;
182
183 // include my snaps within interval [first,last]
184 for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first
185 p != srnode.snaps.end() && p->first <= last;
186 ++p)
187 s.insert(p->first);
188
189 // include snaps for parents during intervals that intersect [first,last]
190 for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
191 p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
192 ++p) {
193 CInode *oldparent = mdcache->get_inode(p->second.ino);
194 assert(oldparent); // call open_parents first!
195 assert(oldparent->snaprealm);
196 oldparent->snaprealm->build_snap_set(s, max_seq, max_last_created, max_last_destroyed,
197 MAX(first, p->second.first),
198 MIN(last, p->first)); //递归,将past_parent的snapid加入set容器
199 }
200 if (srnode.current_parent_since <= last && parent)
201 parent->build_snap_set(s, max_seq, max_last_created, max_last_destroyed,
202 MAX(first, srnode.current_parent_since), last); //递归,将current_parent的snapid加入set容器
203 }
204
205
206 //检查cache中的数据结构并更新cached_snaps和cached_snap_trace
207 void SnapRealm::check_cache()
208 {
209 if (cached_seq >= srnode.seq)
210 return;
211
212 cached_snaps.clear();
213 cached_snap_context.clear();
214
215 cached_last_created = srnode.last_created;
216 cached_last_destroyed = srnode.last_destroyed;
217 cached_seq = srnode.seq;
218 build_snap_set(cached_snaps, cached_seq, cached_last_created, cached_last_destroyed,
219 0, CEPH_NOSNAP);
220
221 cached_snap_trace.clear();
222 build_snap_trace(cached_snap_trace);
223
224 dout(10) << "check_cache rebuilt " << cached_snaps
225 << " seq " << srnode.seq
226 << " cached_seq " << cached_seq
227 << " cached_last_created " << cached_last_created
228 << " cached_last_destroyed " << cached_last_destroyed
229 << ")" << dendl;
230 }
231
232
233 const set<snapid_t>& SnapRealm::get_snaps()
234 {
235 check_cache();
236 dout(10) << "get_snaps " << cached_snaps
237 << " (seq " << srnode.seq << " cached_seq " << cached_seq << ")"
238 << dendl;
239 return cached_snaps;
240 }
241
242 /*
243 * build vector in reverse sorted order
244 */
245 //循环将存储snapid的set中的内容放入返回值cached_snap_context中
246 const SnapContext& SnapRealm::get_snap_context()
247 {
248 check_cache();
249
250 if (!cached_snap_context.seq) {
251 cached_snap_context.seq = cached_seq;
252 cached_snap_context.snaps.resize(cached_snaps.size());
253 unsigned i = 0;
254 for (set<snapid_t>::reverse_iterator p = cached_snaps.rbegin();
255 p != cached_snaps.rend();
256 ++p)
257 cached_snap_context.snaps[i++] = *p;
258 }
259
260 return cached_snap_context;
261 }
262
263 //得到cache_snap,并将其info存入infomap的map容器中
264 void SnapRealm::get_snap_info(map<snapid_t,SnapInfo*>& infomap, snapid_t first, snapid_t last)
265 {
266 const set<snapid_t>& snaps = get_snaps();
267 dout(10) << "get_snap_info snaps " << snaps << dendl;
268
269 // include my snaps within interval [first,last]
270 for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first
271 p != srnode.snaps.end() && p->first <= last;
272 ++p)
273 infomap[p->first] = &p->second; //令键为p->first的infomap的值等于&p->second
274
275 // include snaps for parents during intervals that intersect [first,last]
276 for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
277 p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
278 ++p) {
279 CInode *oldparent = mdcache->get_inode(p->second.ino);
280 assert(oldparent); // call open_parents first!
281 assert(oldparent->snaprealm);
282 oldparent->snaprealm->get_snap_info(infomap,
283 MAX(first, p->second.first),
284 MIN(last, p->first));
285 }
286 if (srnode.current_parent_since <= last && parent)
287 parent->get_snap_info(infomap, MAX(first, srnode.current_parent_since), last);
288 }
289
290 const string& SnapRealm::get_snapname(snapid_t snapid, inodeno_t atino)
291 {
292 if (srnode.snaps.count(snapid)) {
293 if (atino == inode->ino())
294 return srnode.snaps[snapid].name;
295 else
296 return srnode.snaps[snapid].get_long_name();
297 }
298
299 map<snapid_t,snaplink_t>::iterator p = srnode.past_parents.lower_bound(snapid);
300 if (p != srnode.past_parents.end() && p->second.first <= snapid) {
301 CInode *oldparent = mdcache->get_inode(p->second.ino);
302 assert(oldparent); // call open_parents first!
303 assert(oldparent->snaprealm);
304 return oldparent->snaprealm->get_snapname(snapid, atino);
305 }
306
307 assert(srnode.current_parent_since <= snapid);
308 assert(parent);
309 return parent->get_snapname(snapid, atino);
310 }
311
312 //判断第一个参数n,在srnode.snaps从first到last范围内找出对应的snapid
313 snapid_t SnapRealm::resolve_snapname(const string& n, inodeno_t atino, snapid_t first, snapid_t last)
314 {
315 // first try me
316 dout(10) << "resolve_snapname '" << n << "' in [" << first << "," << last << "]" << dendl;
317
318 //snapid_t num;
319 //if (n[0] == '~') num = atoll(n.c_str()+1);
320
321 bool actual = (atino == inode->ino());
322 string pname;
323 inodeno_t pino;
324 if (!actual) {
325 if (!n.length() ||
326 n[0] != '_') return 0;
327 int next_ = n.find('_', 1);
328 if (next_ < 0) return 0;
329 pname = n.substr(1, next_ - 1);
330 pino = atoll(n.c_str() + next_ + 1);
331 dout(10) << " " << n << " parses to name '" << pname << "' dirino " << pino << dendl;
332 }
333
334 //根据snapinfo得到snapid
335 for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first
336 p != srnode.snaps.end() && p->first <= last;
337 ++p) {
338 dout(15) << " ? " << p->second << dendl;
339 //if (num && p->second.snapid == num)
340 //return p->first;
341 if (actual && p->second.name == n)
342 return p->first;
343 if (!actual && p->second.name == pname && p->second.ino == pino)
344 return p->first;
345 }
346
347 // include snaps for parents during intervals that intersect [first,last]
348 for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
349 p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
350 ++p) {
351 CInode *oldparent = mdcache->get_inode(p->second.ino);
352 assert(oldparent); // call open_parents first!
353 assert(oldparent->snaprealm);
354 snapid_t r = oldparent->snaprealm->resolve_snapname(n, atino,
355 MAX(first, p->second.first),
356 MIN(last, p->first));
357 if (r)
358 return r;
359 }
360 if (parent && srnode.current_parent_since <= last)
361 return parent->resolve_snapname(n, atino, MAX(first, srnode.current_parent_since), last);
362 return 0;
363 }
364
365
366 //调整parent令变量parent等于当前parent
367 void SnapRealm::adjust_parent()
368 {
369 SnapRealm *newparent = inode->get_parent_dn()->get_dir()->get_inode()->find_snaprealm();
370 if (newparent != parent) {
371 dout(10) << "adjust_parent " << parent << " -> " << newparent << dendl;
372 if (parent)
373 parent->open_children.erase(this);
374 parent = newparent;
375 if (parent)
376 parent->open_children.insert(this);
377
378 invalidate_cached_snaps();
379 }
380 }
381
382 //将child分离出当前realm
383 void SnapRealm::split_at(SnapRealm *child)
384 {
385 dout(10) << "split_at " << *child
386 << " on " << *child->inode << dendl;
387
388 if (inode->is_mdsdir() || !child->inode->is_dir()) {
389 // it's not a dir.
390 if (child->inode->containing_realm) {
391 // - no open children.
392 // - only need to move this child's inode's caps.
393 child->inode->move_to_realm(child); //脱离containing_realm,加入到child realm
394 } else {
395 // no caps, nothing to move/split.
396 dout(20) << " split no-op, no caps to move on file " << *child->inode << dendl;
397 assert(!child->inode->is_any_caps());
398 }
399 return;
400 }
401
402 // it's a dir.
403
404 // split open_children
405 dout(10) << " open_children are " << open_children << dendl;
406 for (set<SnapRealm*>::iterator p = open_children.begin();
407 p != open_children.end(); ) {
408 SnapRealm *realm = *p;
409 if (realm != child &&
410 child->inode->is_projected_ancestor_of(realm->inode)) {
411 dout(20) << " child gets child realm " << *realm << " on " << *realm->inode << dendl;
412 realm->parent = child;
413 child->open_children.insert(realm);
414 open_children.erase(p++);
415 } else {
416 dout(20) << " keeping child realm " << *realm << " on " << *realm->inode << dendl;
417 ++p;
418 }
419 }
420
421 // split inodes_with_caps
422 elist<CInode*>::iterator p = inodes_with_caps.begin(member_offset(CInode, item_caps));
423 while (!p.end()) {
424 CInode *in = *p;
425 ++p;
426
427 // does inode fall within the child realm?
428 bool under_child = false;
429
430 if (in == child->inode) {
431 under_child = true;
432 } else {
433 CInode *t = in;
434 while (t->get_parent_dn()) {
435 t = t->get_parent_dn()->get_dir()->get_inode();
436 if (t == child->inode) {
437 under_child = true;
438 break;
439 }
440 if (t == in)
441 break;
442 }
443 }
444 if (under_child) {
445 dout(20) << " child gets " << *in << dendl;
446 in->move_to_realm(child); //脱离containing_realm,加入到child realm
447 } else {
448 dout(20) << " keeping " << *in << dendl;
449 }
450 }
451
452 }
453
454 const bufferlist& SnapRealm::get_snap_trace()
455 {
456 check_cache();
457 return cached_snap_trace;
458 }
459
460 void SnapRealm::build_snap_trace(bufferlist& snapbl) //将snaps和所有parents存储在snapbl中
461 {
462 SnapRealmInfo info(inode->ino(), srnode.created, srnode.seq, srnode.current_parent_since);
463
464 if (parent) {
465 info.h.parent = parent->inode->ino();
466 if (!srnode.past_parents.empty()) {
467 snapid_t last = srnode.past_parents.rbegin()->first;
468 set<snapid_t> past;
469 snapid_t max_seq, max_last_created, max_last_destroyed;
470 build_snap_set(past, max_seq, max_last_created, max_last_destroyed, 0, last);
471 info.prior_parent_snaps.reserve(past.size());
472 for (set<snapid_t>::reverse_iterator p = past.rbegin(); p != past.rend(); ++p)
473 info.prior_parent_snaps.push_back(*p);
474 dout(10) << "build_snap_trace prior_parent_snaps from [1," << last << "] "
475 << info.prior_parent_snaps << dendl;
476 }
477 } else
478 info.h.parent = 0;
479
480 info.my_snaps.reserve(srnode.snaps.size());
481 for (map<snapid_t,SnapInfo>::reverse_iterator p = srnode.snaps.rbegin();
482 p != srnode.snaps.rend();
483 ++p)
484 info.my_snaps.push_back(p->first);
485 dout(10) << "build_snap_trace my_snaps " << info.my_snaps << dendl;
486
487 ::encode(info, snapbl);
488
489 if (parent)
490 parent->build_snap_trace(snapbl);
491 }
492
493
494 //删除past_parents中有但cached_snaps中没有的
495 void SnapRealm::prune_past_parents()
496 {
497 dout(10) << "prune_past_parents" << dendl;
498 check_cache();
499 assert(open);
500
501 map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.begin();
502 while (p != srnode.past_parents.end()) {
503 set<snapid_t>::iterator q = cached_snaps.lower_bound(p->second.first);
504 if (q == cached_snaps.end() || //若cached_snaps中没有p指向的值则erase
505 *q > p->first) {
506 dout(10) << "prune_past_parents pruning [" << p->second.first << "," << p->first
507 << "] " << p->second.ino << dendl;
508 srnode.past_parents.erase(p++);
509 } else {
510 dout(10) << "prune_past_parents keeping [" << p->second.first << "," << p->first
511 << "] " << p->second.ino << dendl;
512 ++p;
513 }
514 }
515 }
snapserver.cc:
1 #include "SnapServer.h"
2 #include "MDS.h"
3 #include "osd/OSDMap.h"
4 #include "osdc/Objecter.h"
5 #include "mon/MonClient.h"
6
7 #include "include/types.h"
8 #include "messages/MMDSTableRequest.h"
9 #include "messages/MRemoveSnaps.h"
10
11 #include "msg/Messenger.h"
12
13 #include "common/config.h"
14 #include "include/assert.h"
15
16 #define dout_subsys ceph_subsys_mds
17 #undef dout_prefix
18 #define dout_prefix *_dout << "mds." << rank << ".snap "
19
20
21 void SnapServer::reset_state() //重置状态,将snaps和need_to_purge中的内容清空
22 {
23 last_snap = 1; /* snapid 1 reserved for initial root snaprealm */
24 snaps.clear();
25 need_to_purge.clear();
26
27 // find any removed snapshot in data pools
28 snapid_t first_free = 0;
29 const OSDMap *osdmap = mds->objecter->get_osdmap_read();
30 for (set<int64_t>::const_iterator p = mds->mdsmap->get_data_pools().begin();
31 p != mds->mdsmap->get_data_pools().end();
32 ++p) {
33 const pg_pool_t *pi = osdmap->get_pg_pool(*p);
34 if (!pi->removed_snaps.empty() &&
35 pi->removed_snaps.range_end() > first_free)
36 first_free = pi->removed_snaps.range_end();
37 }
38 mds->objecter->put_osdmap_read();
39 if (first_free > last_snap)
40 last_snap = first_free; //扩展last_snap范围
41 }
42
43
44 // SERVER
45 //根据操作执行:创建-将bl中的内容加入到pending_creat中,销毁-将bl中的内容加入到pending_destroy中
46 void SnapServer::_prepare(bufferlist &bl, uint64_t reqid, mds_rank_t bymds)
47 {
48 bufferlist::iterator p = bl.begin();
49 __u32 op;
50 ::decode(op, p);
51
52 switch (op) {
53 case TABLE_OP_CREATE:
54 {
55 version++;
56
57 SnapInfo info;
58 ::decode(info.ino, p);
59 if (!p.end()) {
60 ::decode(info.name, p);
61 ::decode(info.stamp, p);
62 info.snapid = ++last_snap;
63 pending_create[version] = info;
64 dout(10) << "prepare v" << version << " create " << info << dendl;
65 } else {
66 pending_noop.insert(version);
67 dout(10) << "prepare v" << version << " noop" << dendl;
68 }
69 bl.clear();
70 ::encode(last_snap, bl);
71 }
72 break;
73
74 case TABLE_OP_DESTROY:
75 {
76 inodeno_t ino;
77 snapid_t snapid;
78 ::decode(ino, p); // not used, currently.
79 ::decode(snapid, p);
80 version++;
81
82 // bump last_snap... we use it as a version value on the snaprealm.
83 ++last_snap;
84
85 pending_destroy[version] = pair<snapid_t,snapid_t>(snapid, last_snap);
86 dout(10) << "prepare v" << version << " destroy " << snapid << " seq " << last_snap << dendl;
87
88 bl.clear();
89 ::encode(last_snap, bl);
90 }
91 break;
92
93 default:
94 assert(0);
95 }
96 //dump();
97 }
98
99 //返回容器中此id出现的次数
100 bool SnapServer::_is_prepared(version_t tid)
101 {
102 return
103 pending_create.count(tid) ||
104 pending_destroy.count(tid);
105 }
106
107 //将操作pending_create或pending_destroy的内容存入对应的结构中
108 bool SnapServer::_commit(version_t tid, MMDSTableRequest *req)
109 {
110 if (pending_create.count(tid)) { //若此tid在pending_creat中有值,清空并将此结构中对应snapid中的内容存入snaps中
111 dout(7) << "commit " << tid << " create " << pending_create[tid] << dendl;
112 snaps[pending_create[tid].snapid] = pending_create[tid];
113 pending_create.erase(tid);
114 }
115
116 else if (pending_destroy.count(tid)) { //若此tid在pending_creat中有值,清空并将对应内容加入到need_to_purge
117 snapid_t sn = pending_destroy[tid].first; //removed_snap
118 snapid_t seq = pending_destroy[tid].second; //seq
119 dout(7) << "commit " << tid << " destroy " << sn << " seq " << seq << dendl;
120 snaps.erase(sn);
121
122 for (set<int64_t>::const_iterator p = mds->mdsmap->get_data_pools().begin();
123 p != mds->mdsmap->get_data_pools().end();
124 ++p) {
125 need_to_purge[*p].insert(sn);
126 need_to_purge[*p].insert(seq);
127 }
128
129 pending_destroy.erase(tid);
130 }
131 else if (pending_noop.count(tid)) { //清空pending_noop
132 dout(7) << "commit " << tid << " noop" << dendl;
133 pending_noop.erase(tid);
134 }
135 else
136 assert(0);
137
138 // bump version.
139 version++;
140 //dump();
141 return true;
142 }
143
144 //输出要执行的回滚命令并清空pending_create等结构中的内容,不执行具体操作
145 void SnapServer::_rollback(version_t tid)
146 {
147 if (pending_create.count(tid)) {
148 dout(7) << "rollback " << tid << " create " << pending_create[tid] << dendl;
149 pending_create.erase(tid);
150 }
151
152 else if (pending_destroy.count(tid)) {
153 dout(7) << "rollback " << tid << " destroy " << pending_destroy[tid] << dendl;
154 pending_destroy.erase(tid);
155 }
156
157 else if (pending_noop.count(tid)) {
158 dout(7) << "rollback " << tid << " noop" << dendl;
159 pending_noop.erase(tid);
160 }
161
162 else
163 assert(0);
164
165 // bump version.
166 version++;
167 //dump();
168 }
169
170 //将传入参数bl中的项在need_to_purge的对应项擦除
171 void SnapServer::_server_update(bufferlist& bl)
172 {
173 bufferlist::iterator p = bl.begin();
174 map<int, vector<snapid_t> > purge;
175 ::decode(purge, p);
176
177 dout(7) << "_server_update purged " << purge << dendl;
178 for (map<int, vector<snapid_t> >::iterator p = purge.begin();
179 p != purge.end();
180 ++p) {
181 for (vector<snapid_t>::iterator q = p->second.begin();
182 q != p->second.end();
183 ++q)
184 need_to_purge[p->first].erase(*q);
185 if (need_to_purge[p->first].empty())
186 need_to_purge.erase(p->first);
187 }
188
189 version++;
190 }
191
192 void SnapServer::handle_query(MMDSTableRequest *req)
193 {
194 req->put();
195 }
196
197
198 //遍历need_to_purge,执行remove的操作
199 void SnapServer::check_osd_map(bool force)
200 {
201 if (!force && version == last_checked_osdmap) {
202 dout(10) << "check_osd_map - version unchanged" << dendl;
203 return;
204 }
205 dout(10) << "check_osd_map need_to_purge=" << need_to_purge << dendl;
206
207 map<int, vector<snapid_t> > all_purge;
208 map<int, vector<snapid_t> > all_purged;
209
210 const OSDMap *osdmap = mds->objecter->get_osdmap_read();
211 for (map<int, set<snapid_t> >::iterator p = need_to_purge.begin(); //遍历need_to_purge
212 p != need_to_purge.end();
213 ++p) {
214 int id = p->first;
215 const pg_pool_t *pi = osdmap->get_pg_pool(id);
216 for (set<snapid_t>::iterator q = p->second.begin();
217 q != p->second.end();
218 ++q) {
219 if (pi->is_removed_snap(*q)) { //已经remove,存入all_purged,还没有remove则存入all_purge
220 dout(10) << " osdmap marks " << *q << " as removed" << dendl;
221 all_purged[id].push_back(*q);
222 } else {
223 all_purge[id].push_back(*q);
224 }
225 }
226 }
227 mds->objecter->put_osdmap_read();
228
229 if (!all_purged.empty()) { //已经purge,调用do_server_update更新mdlog
230 // prepare to remove from need_to_purge list
231 bufferlist bl;
232 ::encode(all_purged, bl);
233 do_server_update(bl);
234 }
235
236 if (!all_purge.empty()) { //需要purge,创建消息存储all_purge并发送给mon
237 dout(10) << "requesting removal of " << all_purge << dendl;
238 MRemoveSnaps *m = new MRemoveSnaps(all_purge);
239 mds->monc->send_mon_message(m);
240 }
241
242 last_checked_osdmap = version;
243 }
244
245
246
247 void SnapServer::dump(Formatter *f) const
248 {
249 f->open_object_section("snapserver");
250
251 f->dump_int("last_snap", last_snap.val);
252
253 f->open_array_section("pending_noop");
254 for(set<version_t>::const_iterator i = pending_noop.begin(); i != pending_noop.end(); ++i) {
255 f->dump_unsigned("version", *i);
256 }
257 f->close_section();
258
259 f->open_array_section("snaps");
260 for (map<snapid_t, SnapInfo>::const_iterator i = snaps.begin(); i != snaps.end(); ++i) {
261 f->open_object_section("snap");
262 i->second.dump(f);
263 f->close_section();
264 }
265 f->close_section();
266
267 f->open_object_section("need_to_purge");
268 for (map<int, set<snapid_t> >::const_iterator i = need_to_purge.begin(); i != need_to_purge.end(); ++i) {
269 stringstream pool_id;
270 pool_id << i->first;
271 f->open_array_section(pool_id.str().c_str());
272 for (set<snapid_t>::const_iterator s = i->second.begin(); s != i->second.end(); ++s) {
273 f->dump_unsigned("snapid", s->val);
274 }
275 f->close_section();
276 }
277 f->close_section();
278
279 f->open_array_section("pending_create");
280 for(map<version_t, SnapInfo>::const_iterator i = pending_create.begin(); i != pending_create.end(); ++i) {
281 f->open_object_section("snap");
282 f->dump_unsigned("version", i->first);
283 f->open_object_section("snapinfo");
284 i->second.dump(f);
285 f->close_section();
286 f->close_section();
287 }
288 f->close_section();
289
290 f->open_array_section("pending_destroy");
291 for(map<version_t, pair<snapid_t, snapid_t> >::const_iterator i = pending_destroy.begin(); i != pending_destroy.end(); ++i) {
292 f->open_object_section("snap");
293 f->dump_unsigned("version", i->first);
294 f->dump_unsigned("removed_snap", i->second.first);
295 f->dump_unsigned("seq", i->second.second);
296 f->close_section();
297 }
298 f->close_section();
299
300 f->close_section();
301 }
302
303 void SnapServer::generate_test_instances(list<SnapServer*>& ls)
304 {
305 list<SnapInfo*> snapinfo_instances;
306 SnapInfo::generate_test_instances(snapinfo_instances);
307 SnapInfo populated_snapinfo = *(snapinfo_instances.back());
308 for (list<SnapInfo*>::iterator i = snapinfo_instances.begin(); i != snapinfo_instances.end(); ++i) {
309 delete *i;
310 }
311
312 SnapServer *blank = new SnapServer();
313 ls.push_back(blank);
314 SnapServer *populated = new SnapServer();
315 populated->last_snap = 123;
316 populated->snaps[456] = populated_snapinfo;
317 populated->need_to_purge[2].insert(012);
318 populated->pending_create[234] = populated_snapinfo;
319 populated->pending_destroy[345].first = 567;
320 populated->pending_destroy[345].second = 768;
321 populated->pending_noop.insert(890);
322
323 ls.push_back(populated);
324
325 }
snapmapper.h中有一段注释如下,
/**
* SnapMapper
*
* Manages two mappings:
* 1) hobject_t -> {snapid} //为每个克隆对象存储快照集
* 2) snapid -> {hobject_t} //存储快照集的对象作为其快照
*
* We accomplish this using two sets of keys:
* 1) OBJECT_PREFIX + obj.str() -> encoding of object_snaps
* 2) MAPPING_PREFIX + snapid_t + obj.str() -> encoding of pair<snapid_t, obj>
*
* The on disk strings and encodings are implemented in to_raw, to_raw_key,
* from_raw, to_object_key.
*
* The object -> {snapid} mapping is primarily included so that the
* SnapMapper state can be verified against the external PG state during
* scrub etc.
*
* The 2) mapping is arranged such that all objects in a particular
* snap will sort together, and so that all objects in a pg for a
* particular snap will group under up to 8 prefixes. //最多8前缀需要进行检查以确定用于特定pg在一个特定的单元的所有对象
*/
snapmapper.cc:
1 #include "SnapMapper.h"
2
3 #define dout_subsys ceph_subsys_osd
4 #undef dout_prefix
5 #define dout_prefix *_dout << "snap_mapper."
6
7 using std::string;
8
9 const string SnapMapper::MAPPING_PREFIX = "MAP_";
10 const string SnapMapper::OBJECT_PREFIX = "OBJ_";
11
12 int OSDriver::get_keys(
13 const std::set<std::string> &keys,
14 std::map<std::string, bufferlist> *out)
15 {
16 return os->omap_get_values(cid, hoid, keys, out);
17 }
18
19 int OSDriver::get_next(
20 const std::string &key,
21 pair<std::string, bufferlist> *next)
22 {
23 ObjectMap::ObjectMapIterator iter =
24 os->get_omap_iterator(cid, hoid);
25 if (!iter) {
26 assert(0);
27 return -EINVAL;
28 }
29 iter->upper_bound(key);
30 if (iter->valid()) {
31 if (next)
32 *next = make_pair(iter->key(), iter->value());
33 return 0;
34 } else {
35 return -ENOENT;
36 }
37 }
38
39 struct Mapping {
40 snapid_t snap;
41 hobject_t hoid;
42 Mapping(const pair<snapid_t, hobject_t> &in)
43 : snap(in.first), hoid(in.second) {}
44 Mapping() : snap(0) {}
45 void encode(bufferlist &bl) const {
46 ENCODE_START(1, 1, bl);
47 ::encode(snap, bl);
48 ::encode(hoid, bl);
49 ENCODE_FINISH(bl);
50 }
51 void decode(bufferlist::iterator &bl) {
52 DECODE_START(1, bl);
53 ::decode(snap, bl);
54 ::decode(hoid, bl);
55 DECODE_FINISH(bl);
56 }
57 };
58 WRITE_CLASS_ENCODER(Mapping)
59
60 string SnapMapper::get_prefix(snapid_t snap)
61 {
62 char buf[100];
63 int len = snprintf(
64 buf, sizeof(buf),
65 "%.*X_", (int)(sizeof(snap)*2),
66 static_cast<unsigned>(snap));
67 return MAPPING_PREFIX + string(buf, len);
68 }
69
70 string SnapMapper::to_raw_key( //实现mapping有两种key,此为raw_key
71 const pair<snapid_t, hobject_t> &in)
72 {
73 return get_prefix(in.first) + shard_prefix + in.second.to_str();
74 }
75
76 pair<string, bufferlist> SnapMapper::to_raw( //返回键值和对应map
77 const pair<snapid_t, hobject_t> &in)
78 {
79 bufferlist bl;
80 ::encode(Mapping(in), bl);
81 return make_pair(
82 to_raw_key(in),
83 bl);
84 }
85
86 pair<snapid_t, hobject_t> SnapMapper::from_raw( //根据map返回snapid和hobject对应的snap和hoid 与to_raw相反
87 const pair<std::string, bufferlist> &image)
88 {
89 Mapping map;
90 bufferlist bl(image.second);
91 bufferlist::iterator bp(bl.begin());
92 ::decode(map, bp);
93 return make_pair(map.snap, map.hoid);
94 }
95
96 bool SnapMapper::is_mapping(const string &to_test)
97 {
98 return to_test.substr(0, MAPPING_PREFIX.size()) == MAPPING_PREFIX;
99 }
100
101 string SnapMapper::to_object_key(const hobject_t &hoid) //实现mapping有两种key,此为object_key
102 {
103 return OBJECT_PREFIX + shard_prefix + hoid.to_str();
104 }
105
106 void SnapMapper::object_snaps::encode(bufferlist &bl) const
107 {
108 ENCODE_START(1, 1, bl);
109 ::encode(oid, bl);
110 ::encode(snaps, bl);
111 ENCODE_FINISH(bl);
112 }
113
114 void SnapMapper::object_snaps::decode(bufferlist::iterator &bl)
115 {
116 DECODE_START(1, bl);
117 ::decode(oid, bl);
118 ::decode(snaps, bl);
119 DECODE_FINISH(bl);
120 }
121
122 int SnapMapper::get_snaps( //根据iod从backend中找到对应的snaps并输出
123 const hobject_t &oid,
124 object_snaps *out)
125 {
126 assert(check(oid));
127 set<string> keys;
128 map<string, bufferlist> got;
129 keys.insert(to_object_key(oid));
130 int r = backend.get_keys(keys, &got);
131 if (r < 0)
132 return r;
133 if (got.empty())
134 return -ENOENT;
135 if (out) {
136 bufferlist::iterator bp = got.begin()->second.begin();
137 ::decode(*out, bp);
138 dout(20) << __func__ << " " << oid << " " << out->snaps << dendl;
139 assert(!out->snaps.empty());
140 } else {
141 dout(20) << __func__ << " " << oid << " (out == NULL)" << dendl;
142 }
143 return 0;
144 }
145
146 void SnapMapper::clear_snaps( //remove to_object_key
147 const hobject_t &oid,
148 MapCacher::Transaction<std::string, bufferlist> *t)
149 {
150 assert(check(oid));
151 set<string> to_remove;
152 to_remove.insert(to_object_key(oid));
153 backend.remove_keys(to_remove, t);
154 }
155
156 void SnapMapper::set_snaps( //将object_key和snap设置为一个map,并调用set_keys加入到Transaction结构中
157 const hobject_t &oid,
158 const object_snaps &in,
159 MapCacher::Transaction<std::string, bufferlist> *t)
160 {
161 assert(check(oid));
162 map<string, bufferlist> to_set;
163 bufferlist bl;
164 ::encode(in, bl);
165 to_set[to_object_key(oid)] = bl;
166 backend.set_keys(to_set, t);
167 }
168
169 int SnapMapper::update_snaps( //将new_snaps更新加入backend中
170 const hobject_t &oid,
171 const set<snapid_t> &new_snaps,
172 const set<snapid_t> *old_snaps_check,
173 MapCacher::Transaction<std::string, bufferlist> *t)
174 {
175 dout(20) << __func__ << " " << oid << " " << new_snaps
176 << " was " << (old_snaps_check ? *old_snaps_check : set<snapid_t>())
177 << dendl;
178 assert(check(oid));
179 if (new_snaps.empty())
180 return remove_oid(oid, t);
181
182 object_snaps out;
183 int r = get_snaps(oid, &out);
184 if (r < 0)
185 return r;
186 if (old_snaps_check)
187 assert(out.snaps == *old_snaps_check);
188
189 object_snaps in(oid, new_snaps);
190 set_snaps(oid, in, t);
191
192 set<string> to_remove;
193 for (set<snapid_t>::iterator i = out.snaps.begin();
194 i != out.snaps.end();
195 ++i) {
196 if (!new_snaps.count(*i)) {
197 to_remove.insert(to_raw_key(make_pair(*i, oid)));
198 }
199 }
200 backend.remove_keys(to_remove, t);
201 return 0;
202 }
203
204 void SnapMapper::add_oid(
205 const hobject_t &oid,
206 const set<snapid_t>& snaps,
207 MapCacher::Transaction<std::string, bufferlist> *t)
208 {
209 dout(20) << __func__ << " " << oid << " " << snaps << dendl;
210 assert(check(oid));
211 {
212 object_snaps out;
213 int r = get_snaps(oid, &out);
214 assert(r == -ENOENT);
215 }
216
217 object_snaps _snaps(oid, snaps);
218 set_snaps(oid, _snaps, t);
219
220 map<string, bufferlist> to_add;
221 for (set<snapid_t>::iterator i = snaps.begin();
222 i != snaps.end();
223 ++i) {
224 to_add.insert(to_raw(make_pair(*i, oid)));
225 }
226 backend.set_keys(to_add, t); //将to_add和t打包加入backend
227 }
228
229 int SnapMapper::get_next_object_to_trim(
230 snapid_t snap,
231 hobject_t *hoid)
232 {
233 for (set<string>::iterator i = prefixes.begin();
234 i != prefixes.end();
235 ++i) {
236 string list_after(get_prefix(snap) + *i);
237
238 pair<string, bufferlist> next;
239 int r = backend.get_next(list_after, &next);
240 if (r < 0) {
241 break; // Done
242 }
243
244 if (next.first.substr(0, list_after.size()) !=
245 list_after) {
246 continue; // Done with this prefix
247 }
248
249 assert(is_mapping(next.first));
250
251 pair<snapid_t, hobject_t> next_decoded(from_raw(next));
252 assert(next_decoded.first == snap);
253 assert(check(next_decoded.second));
254
255 if (hoid)
256 *hoid = next_decoded.second;
257 return 0;
258 }
259 return -ENOENT;
260 }
261
262
263 int SnapMapper::remove_oid(
264 const hobject_t &oid,
265 MapCacher::Transaction<std::string, bufferlist> *t)
266 {
267 dout(20) << __func__ << " " << oid << dendl;
268 assert(check(oid));
269 return _remove_oid(oid, t);
270 }
271
272 int SnapMapper::_remove_oid(
273 const hobject_t &oid,
274 MapCacher::Transaction<std::string, bufferlist> *t)
275 {
276 object_snaps out;
277 int r = get_snaps(oid, &out);
278 if (r < 0)
279 return r;
280
281 clear_snaps(oid, t); //remove to_object_key
282
283 set<string> to_remove;
284 for (set<snapid_t>::iterator i = out.snaps.begin();
285 i != out.snaps.end();
286 ++i) {
287 to_remove.insert(to_raw_key(make_pair(*i, oid)));
288 }
289 backend.remove_keys(to_remove, t); //remove to_raw_key
290 return 0;
291 }
292
293 int SnapMapper::get_snaps( //根据oid找到对应snaps并存在set容器snaps中
294 const hobject_t &oid,
295 std::set<snapid_t> *snaps)
296 {
297 assert(check(oid));
298 object_snaps out;
299 int r = get_snaps(oid, &out);
300 if (r < 0)
301 return r;
302 if (snaps)
303 snaps->swap(out.snaps);
304 return 0;
305 }