1/*
2 * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3 * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup lwext4
31 * @{
32 */
33/**
34 * @file ext4_journal.c
35 * @brief Journal handle functions
36 */
37
38#include <ext4_config.h>
39#include <ext4_types.h>
40#include <ext4_misc.h>
41#include <ext4_errno.h>
42#include <ext4_debug.h>
43
44#include <ext4_fs.h>
45#include <ext4_super.h>
46#include <ext4_journal.h>
47#include <ext4_blockdev.h>
48#include <ext4_crc32.h>
49#include <ext4_journal.h>
50
51#include <string.h>
52#include <stdlib.h>
53
54/**@brief Revoke entry during journal replay.*/
55struct revoke_entry {
56 /**@brief Block number not to be replayed.*/
57 ext4_fsblk_t block;
58
59 /**@brief For any transaction id smaller
60 * than trans_id, records of @block
61 * in those transactions should not
62 * be replayed.*/
63 uint32_t trans_id;
64
65 /**@brief Revoke tree node.*/
66 RB_ENTRY(revoke_entry) revoke_node;
67};
68
69/**@brief Valid journal replay information.*/
70struct recover_info {
71 /**@brief Starting transaction id.*/
72 uint32_t start_trans_id;
73
74 /**@brief Ending transaction id.*/
75 uint32_t last_trans_id;
76
77 /**@brief Used as internal argument.*/
78 uint32_t this_trans_id;
79
80 /**@brief No of transactions went through.*/
81 uint32_t trans_cnt;
82
83 /**@brief RB-Tree storing revoke entries.*/
84 RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
85};
86
87/**@brief Journal replay internal arguments.*/
88struct replay_arg {
89 /**@brief Journal replay information.*/
90 struct recover_info *info;
91
92 /**@brief Current block we are on.*/
93 uint32_t *this_block;
94
95 /**@brief Current trans_id we are on.*/
96 uint32_t this_trans_id;
97};
98
99/* Make sure we wrap around the log correctly! */
100#define wrap(sb, var) \
101do { \
102 if (var >= jbd_get32((sb), maxlen)) \
103 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first)); \
104} while (0)
105
106static inline int32_t
107trans_id_diff(uint32_t x, uint32_t y)
108{
109 int32_t diff = x - y;
110 return diff;
111}
112
113static int
114jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
115{
116 if (a->block > b->block)
117 return 1;
118 else if (a->block < b->block)
119 return -1;
120 return 0;
121}
122
123static int
124jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
125{
126 if (a->lba > b->lba)
127 return 1;
128 else if (a->lba < b->lba)
129 return -1;
130 return 0;
131}
132
133static int
134jbd_revoke_rec_cmp(struct jbd_revoke_rec *a, struct jbd_revoke_rec *b)
135{
136 if (a->lba > b->lba)
137 return 1;
138 else if (a->lba < b->lba)
139 return -1;
140 return 0;
141}
142
143RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
144 jbd_revoke_entry_cmp, static inline)
145RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
146 jbd_block_rec_cmp, static inline)
147RB_GENERATE_INTERNAL(jbd_revoke_tree, jbd_revoke_rec, revoke_node,
148 jbd_revoke_rec_cmp, static inline)
149
150#define jbd_alloc_revoke_entry() ext4_calloc(1, sizeof(struct revoke_entry))
151#define jbd_free_revoke_entry(addr) ext4_free(addr)
152
153static int jbd_has_csum(struct jbd_sb *jbd_sb)
154{
155 if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
156 return 2;
157
158 if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
159 return 3;
160
161 return 0;
162}
163
164#if CONFIG_META_CSUM_ENABLE
165static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb)
166{
167 uint32_t checksum = 0;
168
169 if (jbd_has_csum(jbd_sb)) {
170 uint32_t orig_checksum = jbd_sb->checksum;
171 jbd_set32(jbd_sb, checksum, 0);
172 /* Calculate crc32c checksum against tho whole superblock */
173 checksum = ext4_crc32c(EXT4_CRC32_INIT, buf: jbd_sb,
174 JBD_SUPERBLOCK_SIZE);
175 jbd_sb->checksum = orig_checksum;
176 }
177 return checksum;
178}
179#else
180#define jbd_sb_csum(...) 0
181#endif
182
183static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
184{
185 if (!jbd_has_csum(jbd_sb))
186 return;
187
188 jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
189}
190
191#if CONFIG_META_CSUM_ENABLE
192static bool
193jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
194{
195 if (!jbd_has_csum(jbd_sb))
196 return true;
197
198 return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
199}
200#else
201#define jbd_verify_sb_csum(...) true
202#endif
203
204#if CONFIG_META_CSUM_ENABLE
205static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs,
206 struct jbd_bhdr *bhdr)
207{
208 uint32_t checksum = 0;
209
210 if (jbd_has_csum(jbd_sb: &jbd_fs->sb)) {
211 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
212 struct jbd_block_tail *tail =
213 (struct jbd_block_tail *)((char *)bhdr + block_size -
214 sizeof(struct jbd_block_tail));
215 uint32_t orig_checksum = tail->checksum;
216 tail->checksum = 0;
217
218 /* First calculate crc32c checksum against fs uuid */
219 checksum = ext4_crc32c(EXT4_CRC32_INIT, buf: jbd_fs->sb.uuid,
220 size: sizeof(jbd_fs->sb.uuid));
221 /* Calculate crc32c checksum against tho whole block */
222 checksum = ext4_crc32c(crc: checksum, buf: bhdr,
223 size: block_size);
224 tail->checksum = orig_checksum;
225 }
226 return checksum;
227}
228#else
229#define jbd_meta_csum(...) 0
230#endif
231
232static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
233 struct jbd_bhdr *bhdr)
234{
235 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
236 struct jbd_block_tail *tail = (struct jbd_block_tail *)
237 ((char *)bhdr + block_size -
238 sizeof(struct jbd_block_tail));
239 if (!jbd_has_csum(jbd_sb: &jbd_fs->sb))
240 return;
241
242 tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
243}
244
245#if CONFIG_META_CSUM_ENABLE
246static bool
247jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
248 struct jbd_bhdr *bhdr)
249{
250 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
251 struct jbd_block_tail *tail = (struct jbd_block_tail *)
252 ((char *)bhdr + block_size -
253 sizeof(struct jbd_block_tail));
254 if (!jbd_has_csum(jbd_sb: &jbd_fs->sb))
255 return true;
256
257 return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
258}
259#else
260#define jbd_verify_meta_csum(...) true
261#endif
262
263#if CONFIG_META_CSUM_ENABLE
264static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs,
265 struct jbd_commit_header *header)
266{
267 uint32_t checksum = 0;
268
269 if (jbd_has_csum(jbd_sb: &jbd_fs->sb)) {
270 uint8_t orig_checksum_type = header->chksum_type,
271 orig_checksum_size = header->chksum_size;
272 uint32_t orig_checksum = header->chksum[0];
273 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
274 header->chksum_type = 0;
275 header->chksum_size = 0;
276 header->chksum[0] = 0;
277
278 /* First calculate crc32c checksum against fs uuid */
279 checksum = ext4_crc32c(EXT4_CRC32_INIT, buf: jbd_fs->sb.uuid,
280 size: sizeof(jbd_fs->sb.uuid));
281 /* Calculate crc32c checksum against tho whole block */
282 checksum = ext4_crc32c(crc: checksum, buf: header,
283 size: block_size);
284
285 header->chksum_type = orig_checksum_type;
286 header->chksum_size = orig_checksum_size;
287 header->chksum[0] = orig_checksum;
288 }
289 return checksum;
290}
291#else
292#define jbd_commit_csum(...) 0
293#endif
294
295static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
296 struct jbd_commit_header *header)
297{
298 if (!jbd_has_csum(jbd_sb: &jbd_fs->sb))
299 return;
300
301 header->chksum_type = 0;
302 header->chksum_size = 0;
303 header->chksum[0] = jbd_commit_csum(jbd_fs, header);
304}
305
306#if CONFIG_META_CSUM_ENABLE
307static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
308 struct jbd_commit_header *header)
309{
310 if (!jbd_has_csum(jbd_sb: &jbd_fs->sb))
311 return true;
312
313 return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
314 header));
315}
316#else
317#define jbd_verify_commit_csum(...) true
318#endif
319
320#if CONFIG_META_CSUM_ENABLE
321/*
322 * NOTE: We only make use of @csum parameter when
323 * JBD_FEATURE_COMPAT_CHECKSUM is enabled.
324 */
325static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf,
326 uint32_t csum,
327 uint32_t sequence)
328{
329 uint32_t checksum = 0;
330
331 if (jbd_has_csum(jbd_sb: &jbd_fs->sb)) {
332 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
333 /* First calculate crc32c checksum against fs uuid */
334 checksum = ext4_crc32c(EXT4_CRC32_INIT, buf: jbd_fs->sb.uuid,
335 size: sizeof(jbd_fs->sb.uuid));
336 /* Then calculate crc32c checksum against sequence no. */
337 checksum = ext4_crc32c(crc: checksum, buf: &sequence,
338 size: sizeof(uint32_t));
339 /* Calculate crc32c checksum against tho whole block */
340 checksum = ext4_crc32c(crc: checksum, buf,
341 size: block_size);
342 } else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
343 JBD_FEATURE_COMPAT_CHECKSUM)) {
344 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
345 /* Calculate crc32c checksum against tho whole block */
346 checksum = ext4_crc32(crc: csum, buf,
347 size: block_size);
348 }
349 return checksum;
350}
351#else
352#define jbd_block_csum(...) 0
353#endif
354
355static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
356 uint32_t checksum)
357{
358 int ver = jbd_has_csum(jbd_sb: &jbd_fs->sb);
359 if (!ver)
360 return;
361
362 if (ver == 2) {
363 struct jbd_block_tag *tag = __tag;
364 tag->checksum = (uint16_t)to_be32(checksum);
365 } else {
366 struct jbd_block_tag3 *tag = __tag;
367 tag->checksum = to_be32(checksum);
368 }
369}
370
371/**@brief Write jbd superblock to disk.
372 * @param jbd_fs jbd filesystem
373 * @param s jbd superblock
374 * @return standard error code*/
375static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
376{
377 int rc;
378 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
379 uint64_t offset;
380 ext4_fsblk_t fblock;
381 rc = jbd_inode_bmap(jbd_fs, iblock: 0, fblock: &fblock);
382 if (rc != EOK)
383 return rc;
384
385 jbd_sb_csum_set(jbd_sb: s);
386 offset = fblock * ext4_sb_get_block_size(s: &fs->sb);
387 return ext4_block_writebytes(bdev: fs->bdev, off: offset, buf: s,
388 EXT4_SUPERBLOCK_SIZE);
389}
390
391/**@brief Read jbd superblock from disk.
392 * @param jbd_fs jbd filesystem
393 * @param s jbd superblock
394 * @return standard error code*/
395static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
396{
397 int rc;
398 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
399 uint64_t offset;
400 ext4_fsblk_t fblock;
401 rc = jbd_inode_bmap(jbd_fs, iblock: 0, fblock: &fblock);
402 if (rc != EOK)
403 return rc;
404
405 offset = fblock * ext4_sb_get_block_size(s: &fs->sb);
406 return ext4_block_readbytes(bdev: fs->bdev, off: offset, buf: s,
407 EXT4_SUPERBLOCK_SIZE);
408}
409
410/**@brief Verify jbd superblock.
411 * @param sb jbd superblock
412 * @return true if jbd superblock is valid */
413static bool jbd_verify_sb(struct jbd_sb *sb)
414{
415 struct jbd_bhdr *header = &sb->header;
416 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
417 return false;
418
419 if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
420 jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
421 return false;
422
423 return jbd_verify_sb_csum(jbd_sb: sb);
424}
425
426/**@brief Write back dirty jbd superblock to disk.
427 * @param jbd_fs jbd filesystem
428 * @return standard error code*/
429static int jbd_write_sb(struct jbd_fs *jbd_fs)
430{
431 int rc = EOK;
432 if (jbd_fs->dirty) {
433 rc = jbd_sb_write(jbd_fs, s: &jbd_fs->sb);
434 if (rc != EOK)
435 return rc;
436
437 jbd_fs->dirty = false;
438 }
439 return rc;
440}
441
442/**@brief Get reference to jbd filesystem.
443 * @param fs Filesystem to load journal of
444 * @param jbd_fs jbd filesystem
445 * @return standard error code*/
446int jbd_get_fs(struct ext4_fs *fs,
447 struct jbd_fs *jbd_fs)
448{
449 int rc;
450 uint32_t journal_ino;
451
452 memset(dest: jbd_fs, c: 0, size: sizeof(struct jbd_fs));
453 /* See if there is journal inode on this filesystem.*/
454 /* FIXME: detection on existance ofbkejournal bdev is
455 * missing.*/
456 journal_ino = ext4_get32(&fs->sb, journal_inode_number);
457
458 rc = ext4_fs_get_inode_ref(fs,
459 index: journal_ino,
460 ref: &jbd_fs->inode_ref);
461 if (rc != EOK)
462 return rc;
463
464 rc = jbd_sb_read(jbd_fs, s: &jbd_fs->sb);
465 if (rc != EOK)
466 goto Error;
467
468 if (!jbd_verify_sb(sb: &jbd_fs->sb)) {
469 rc = EIO;
470 goto Error;
471 }
472
473 if (rc == EOK)
474 jbd_fs->bdev = fs->bdev;
475
476 return rc;
477Error:
478 ext4_fs_put_inode_ref(ref: &jbd_fs->inode_ref);
479 memset(dest: jbd_fs, c: 0, size: sizeof(struct jbd_fs));
480
481 return rc;
482}
483
484/**@brief Put reference of jbd filesystem.
485 * @param jbd_fs jbd filesystem
486 * @return standard error code*/
487int jbd_put_fs(struct jbd_fs *jbd_fs)
488{
489 int rc = EOK;
490 rc = jbd_write_sb(jbd_fs);
491
492 ext4_fs_put_inode_ref(ref: &jbd_fs->inode_ref);
493 return rc;
494}
495
496/**@brief Data block lookup helper.
497 * @param jbd_fs jbd filesystem
498 * @param iblock block index
499 * @param fblock logical block address
500 * @return standard error code*/
501int jbd_inode_bmap(struct jbd_fs *jbd_fs,
502 ext4_lblk_t iblock,
503 ext4_fsblk_t *fblock)
504{
505 int rc = ext4_fs_get_inode_dblk_idx(
506 inode_ref: &jbd_fs->inode_ref,
507 iblock,
508 fblock,
509 support_unwritten: false);
510 return rc;
511}
512
513/**@brief jbd block get function (through cache).
514 * @param jbd_fs jbd filesystem
515 * @param block block descriptor
516 * @param fblock jbd logical block address
517 * @return standard error code*/
518static int jbd_block_get(struct jbd_fs *jbd_fs,
519 struct ext4_block *block,
520 ext4_fsblk_t fblock)
521{
522 /* TODO: journal device. */
523 int rc;
524 struct ext4_blockdev *bdev = jbd_fs->bdev;
525 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
526
527 /* Lookup the logical block address of
528 * fblock.*/
529 rc = jbd_inode_bmap(jbd_fs, iblock,
530 fblock: &fblock);
531 if (rc != EOK)
532 return rc;
533
534 rc = ext4_block_get(bdev, b: block, lba: fblock);
535
536 /* If succeeded, mark buffer as BC_FLUSH to indicate
537 * that data should be written to disk immediately.*/
538 if (rc == EOK) {
539 ext4_bcache_set_flag(block->buf, BC_FLUSH);
540 /* As we don't want to occupy too much space
541 * in block cache, we set this buffer BC_TMP.*/
542 ext4_bcache_set_flag(block->buf, BC_TMP);
543 }
544
545 return rc;
546}
547
548/**@brief jbd block get function (through cache, don't read).
549 * @param jbd_fs jbd filesystem
550 * @param block block descriptor
551 * @param fblock jbd logical block address
552 * @return standard error code*/
553static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
554 struct ext4_block *block,
555 ext4_fsblk_t fblock)
556{
557 /* TODO: journal device. */
558 int rc;
559 struct ext4_blockdev *bdev = jbd_fs->bdev;
560 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
561 rc = jbd_inode_bmap(jbd_fs, iblock,
562 fblock: &fblock);
563 if (rc != EOK)
564 return rc;
565
566 rc = ext4_block_get_noread(bdev, b: block, lba: fblock);
567 if (rc == EOK)
568 ext4_bcache_set_flag(block->buf, BC_FLUSH);
569
570 return rc;
571}
572
573/**@brief jbd block set procedure (through cache).
574 * @param jbd_fs jbd filesystem
575 * @param block block descriptor
576 * @return standard error code*/
577static int jbd_block_set(struct jbd_fs *jbd_fs,
578 struct ext4_block *block)
579{
580 struct ext4_blockdev *bdev = jbd_fs->bdev;
581 return ext4_block_set(bdev, b: block);
582}
583
584/**@brief helper functions to calculate
585 * block tag size, not including UUID part.
586 * @param jbd_fs jbd filesystem
587 * @return tag size in bytes*/
588static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
589{
590 int size;
591
592 /* It is very easy to deal with the case which
593 * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
594 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
595 JBD_FEATURE_INCOMPAT_CSUM_V3))
596 return sizeof(struct jbd_block_tag3);
597
598 size = sizeof(struct jbd_block_tag);
599
600 /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
601 * add 2 bytes to size.*/
602 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
603 JBD_FEATURE_INCOMPAT_CSUM_V2))
604 size += sizeof(uint16_t);
605
606 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
607 JBD_FEATURE_INCOMPAT_64BIT))
608 return size;
609
610 /* If block number is 4 bytes in size,
611 * minus 4 bytes from size */
612 return size - sizeof(uint32_t);
613}
614
615/**@brief Tag information. */
616struct tag_info {
617 /**@brief Tag size in bytes, including UUID part.*/
618 int tag_bytes;
619
620 /**@brief block number stored in this tag.*/
621 ext4_fsblk_t block;
622
623 /**@brief Is the first 4 bytes of block equals to
624 * JBD_MAGIC_NUMBER? */
625 bool is_escape;
626
627 /**@brief whether UUID part exists or not.*/
628 bool uuid_exist;
629
630 /**@brief UUID content if UUID part exists.*/
631 uint8_t uuid[UUID_SIZE];
632
633 /**@brief Is this the last tag? */
634 bool last_tag;
635
636 /**@brief crc32c checksum. */
637 uint32_t checksum;
638};
639
640/**@brief Extract information from a block tag.
641 * @param __tag pointer to the block tag
642 * @param tag_bytes block tag size of this jbd filesystem
643 * @param remain_buf_size size in buffer containing the block tag
644 * @param tag_info information of this tag.
645 * @return EOK when succeed, otherwise return EINVAL.*/
646static int
647jbd_extract_block_tag(struct jbd_fs *jbd_fs,
648 void *__tag,
649 int tag_bytes,
650 int32_t remain_buf_size,
651 struct tag_info *tag_info)
652{
653 char *uuid_start;
654 tag_info->tag_bytes = tag_bytes;
655 tag_info->uuid_exist = false;
656 tag_info->last_tag = false;
657 tag_info->is_escape = false;
658
659 /* See whether it is possible to hold a valid block tag.*/
660 if (remain_buf_size - tag_bytes < 0)
661 return EINVAL;
662
663 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
664 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
665 struct jbd_block_tag3 *tag = __tag;
666 tag_info->block = jbd_get32(tag, blocknr);
667 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
668 JBD_FEATURE_INCOMPAT_64BIT))
669 tag_info->block |=
670 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
671
672 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
673 tag_info->is_escape = true;
674
675 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
676 /* See whether it is possible to hold UUID part.*/
677 if (remain_buf_size - tag_bytes < UUID_SIZE)
678 return EINVAL;
679
680 uuid_start = (char *)tag + tag_bytes;
681 tag_info->uuid_exist = true;
682 tag_info->tag_bytes += UUID_SIZE;
683 memcpy(dest: tag_info->uuid, src: uuid_start, UUID_SIZE);
684 }
685
686 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
687 tag_info->last_tag = true;
688
689 } else {
690 struct jbd_block_tag *tag = __tag;
691 tag_info->block = jbd_get32(tag, blocknr);
692 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
693 JBD_FEATURE_INCOMPAT_64BIT))
694 tag_info->block |=
695 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
696
697 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
698 tag_info->is_escape = true;
699
700 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
701 /* See whether it is possible to hold UUID part.*/
702 if (remain_buf_size - tag_bytes < UUID_SIZE)
703 return EINVAL;
704
705 uuid_start = (char *)tag + tag_bytes;
706 tag_info->uuid_exist = true;
707 tag_info->tag_bytes += UUID_SIZE;
708 memcpy(dest: tag_info->uuid, src: uuid_start, UUID_SIZE);
709 }
710
711 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
712 tag_info->last_tag = true;
713
714 }
715 return EOK;
716}
717
718/**@brief Write information to a block tag.
719 * @param __tag pointer to the block tag
720 * @param remain_buf_size size in buffer containing the block tag
721 * @param tag_info information of this tag.
722 * @return EOK when succeed, otherwise return EINVAL.*/
723static int
724jbd_write_block_tag(struct jbd_fs *jbd_fs,
725 void *__tag,
726 int32_t remain_buf_size,
727 struct tag_info *tag_info)
728{
729 char *uuid_start;
730 int tag_bytes = jbd_tag_bytes(jbd_fs);
731
732 tag_info->tag_bytes = tag_bytes;
733
734 /* See whether it is possible to hold a valid block tag.*/
735 if (remain_buf_size - tag_bytes < 0)
736 return EINVAL;
737
738 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
739 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
740 struct jbd_block_tag3 *tag = __tag;
741 memset(dest: tag, c: 0, size: sizeof(struct jbd_block_tag3));
742 jbd_set32(tag, blocknr, (uint32_t)tag_info->block);
743 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
744 JBD_FEATURE_INCOMPAT_64BIT))
745 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
746
747 if (tag_info->uuid_exist) {
748 /* See whether it is possible to hold UUID part.*/
749 if (remain_buf_size - tag_bytes < UUID_SIZE)
750 return EINVAL;
751
752 uuid_start = (char *)tag + tag_bytes;
753 tag_info->tag_bytes += UUID_SIZE;
754 memcpy(dest: uuid_start, src: tag_info->uuid, UUID_SIZE);
755 } else
756 jbd_set32(tag, flags,
757 jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
758
759 jbd_block_tag_csum_set(jbd_fs, __tag, checksum: tag_info->checksum);
760
761 if (tag_info->last_tag)
762 jbd_set32(tag, flags,
763 jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
764
765 if (tag_info->is_escape)
766 jbd_set32(tag, flags,
767 jbd_get32(tag, flags) | JBD_FLAG_ESCAPE);
768
769 } else {
770 struct jbd_block_tag *tag = __tag;
771 memset(dest: tag, c: 0, size: sizeof(struct jbd_block_tag));
772 jbd_set32(tag, blocknr, (uint32_t)tag_info->block);
773 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
774 JBD_FEATURE_INCOMPAT_64BIT))
775 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
776
777 if (tag_info->uuid_exist) {
778 /* See whether it is possible to hold UUID part.*/
779 if (remain_buf_size - tag_bytes < UUID_SIZE)
780 return EINVAL;
781
782 uuid_start = (char *)tag + tag_bytes;
783 tag_info->tag_bytes += UUID_SIZE;
784 memcpy(dest: uuid_start, src: tag_info->uuid, UUID_SIZE);
785 } else
786 jbd_set16(tag, flags,
787 jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
788
789 jbd_block_tag_csum_set(jbd_fs, __tag, checksum: tag_info->checksum);
790
791 if (tag_info->last_tag)
792 jbd_set16(tag, flags,
793 jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
794
795
796 if (tag_info->is_escape)
797 jbd_set16(tag, flags,
798 jbd_get16(tag, flags) | JBD_FLAG_ESCAPE);
799
800 }
801 return EOK;
802}
803
804/**@brief Iterate all block tags in a block.
805 * @param jbd_fs jbd filesystem
806 * @param __tag_start pointer to the block
807 * @param tag_tbl_size size of the block
808 * @param func callback routine to indicate that
809 * a block tag is found
810 * @param arg additional argument to be passed to func */
811static void
812jbd_iterate_block_table(struct jbd_fs *jbd_fs,
813 void *__tag_start,
814 int32_t tag_tbl_size,
815 void (*func)(struct jbd_fs * jbd_fs,
816 struct tag_info *tag_info,
817 void *arg),
818 void *arg)
819{
820 char *tag_start, *tag_ptr;
821 int tag_bytes = jbd_tag_bytes(jbd_fs);
822 tag_start = __tag_start;
823 tag_ptr = tag_start;
824
825 /* Cut off the size of block tail storing checksum. */
826 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
827 JBD_FEATURE_INCOMPAT_CSUM_V2) ||
828 JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
829 JBD_FEATURE_INCOMPAT_CSUM_V3))
830 tag_tbl_size -= sizeof(struct jbd_block_tail);
831
832 while (tag_tbl_size) {
833 struct tag_info tag_info;
834 int rc = jbd_extract_block_tag(jbd_fs,
835 tag: tag_ptr,
836 tag_bytes,
837 remain_buf_size: tag_tbl_size,
838 tag_info: &tag_info);
839 if (rc != EOK)
840 break;
841
842 if (func)
843 func(jbd_fs, &tag_info, arg);
844
845 /* Stop the iteration when we reach the last tag. */
846 if (tag_info.last_tag)
847 break;
848
849 tag_ptr += tag_info.tag_bytes;
850 tag_tbl_size -= tag_info.tag_bytes;
851 }
852}
853
854static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
855 struct tag_info *tag_info,
856 void *arg)
857{
858 (void) tag_info;
859 uint32_t *iblock = arg;
860 ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", tag_info->block);
861 (*iblock)++;
862 wrap(&jbd_fs->sb, *iblock);
863 (void)jbd_fs;
864 return;
865}
866
867static struct revoke_entry *
868jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
869{
870 struct revoke_entry tmp = {
871 .block = block
872 };
873
874 return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
875}
876
877/**@brief Replay a block in a transaction.
878 * @param jbd_fs jbd filesystem
879 * @param tag_info tag_info of the logged block.*/
880static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
881 struct tag_info *tag_info,
882 void *__arg)
883{
884 int r;
885 struct replay_arg *arg = __arg;
886 struct recover_info *info = arg->info;
887 uint32_t *this_block = arg->this_block;
888 struct revoke_entry *revoke_entry;
889 struct ext4_block journal_block, ext4_block;
890 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
891
892 (*this_block)++;
893 wrap(&jbd_fs->sb, *this_block);
894
895 /* We replay this block only if the current transaction id
896 * is equal or greater than that in revoke entry.*/
897 revoke_entry = jbd_revoke_entry_lookup(info, block: tag_info->block);
898 if (revoke_entry &&
899 trans_id_diff(x: arg->this_trans_id, y: revoke_entry->trans_id) <= 0)
900 return;
901
902 ext4_dbg(DEBUG_JBD,
903 "Replaying block in block_tag: %" PRIu64 "\n",
904 tag_info->block);
905
906 r = jbd_block_get(jbd_fs, block: &journal_block, fblock: *this_block);
907 if (r != EOK)
908 return;
909
910 /* We need special treatment for ext4 superblock. */
911 if (tag_info->block) {
912 r = ext4_block_get_noread(bdev: fs->bdev, b: &ext4_block, lba: tag_info->block);
913 if (r != EOK) {
914 jbd_block_set(jbd_fs, block: &journal_block);
915 return;
916 }
917
918 memcpy(dest: ext4_block.data,
919 src: journal_block.data,
920 jbd_get32(&jbd_fs->sb, blocksize));
921
922 if (tag_info->is_escape)
923 ((struct jbd_bhdr *)ext4_block.data)->magic =
924 to_be32(JBD_MAGIC_NUMBER);
925
926 ext4_bcache_set_dirty(buf: ext4_block.buf);
927 ext4_block_set(bdev: fs->bdev, b: &ext4_block);
928 } else {
929 uint16_t mount_count, state;
930 mount_count = ext4_get16(&fs->sb, mount_count);
931 state = ext4_get16(&fs->sb, state);
932
933 memcpy(dest: &fs->sb,
934 src: journal_block.data + EXT4_SUPERBLOCK_OFFSET,
935 EXT4_SUPERBLOCK_SIZE);
936
937 /* Mark system as mounted */
938 ext4_set16(&fs->sb, state, state);
939 r = ext4_sb_write(bdev: fs->bdev, s: &fs->sb);
940 if (r != EOK)
941 return;
942
943 /*Update mount count*/
944 ext4_set16(&fs->sb, mount_count, mount_count);
945 }
946
947 jbd_block_set(jbd_fs, block: &journal_block);
948
949 return;
950}
951
952/**@brief Add block address to revoke tree, along with
953 * its transaction id.
954 * @param info journal replay info
955 * @param block block address to be replayed.*/
956static void jbd_add_revoke_block_tags(struct recover_info *info,
957 ext4_fsblk_t block)
958{
959 struct revoke_entry *revoke_entry;
960
961 ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
962 /* If the revoke entry with respect to the block address
963 * exists already, update its transaction id.*/
964 revoke_entry = jbd_revoke_entry_lookup(info, block);
965 if (revoke_entry) {
966 revoke_entry->trans_id = info->this_trans_id;
967 return;
968 }
969
970 revoke_entry = jbd_alloc_revoke_entry();
971 ext4_assert(revoke_entry);
972 revoke_entry->block = block;
973 revoke_entry->trans_id = info->this_trans_id;
974 RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
975
976 return;
977}
978
979static void jbd_destroy_revoke_tree(struct recover_info *info)
980{
981 while (!RB_EMPTY(&info->revoke_root)) {
982 struct revoke_entry *revoke_entry =
983 RB_MIN(jbd_revoke, &info->revoke_root);
984 ext4_assert(revoke_entry);
985 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
986 jbd_free_revoke_entry(revoke_entry);
987 }
988}
989
990
991#define ACTION_SCAN 0
992#define ACTION_REVOKE 1
993#define ACTION_RECOVER 2
994
995/**@brief Add entries in a revoke block to revoke tree.
996 * @param jbd_fs jbd filesystem
997 * @param header revoke block header
998 * @param info journal replay info*/
999static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
1000 struct jbd_bhdr *header,
1001 struct recover_info *info)
1002{
1003 char *blocks_entry;
1004 struct jbd_revoke_header *revoke_hdr =
1005 (struct jbd_revoke_header *)header;
1006 uint32_t i, nr_entries, record_len = 4;
1007
1008 /* If we are working on a 64bit jbd filesystem, */
1009 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
1010 JBD_FEATURE_INCOMPAT_64BIT))
1011 record_len = 8;
1012
1013 nr_entries = (jbd_get32(revoke_hdr, count) -
1014 sizeof(struct jbd_revoke_header)) /
1015 record_len;
1016
1017 blocks_entry = (char *)(revoke_hdr + 1);
1018
1019 for (i = 0;i < nr_entries;i++) {
1020 if (record_len == 8) {
1021 uint64_t *blocks =
1022 (uint64_t *)blocks_entry;
1023 jbd_add_revoke_block_tags(info, to_be64(*blocks));
1024 } else {
1025 uint32_t *blocks =
1026 (uint32_t *)blocks_entry;
1027 jbd_add_revoke_block_tags(info, to_be32(*blocks));
1028 }
1029 blocks_entry += record_len;
1030 }
1031}
1032
1033static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
1034 struct jbd_bhdr *header,
1035 uint32_t *iblock)
1036{
1037 jbd_iterate_block_table(jbd_fs,
1038 tag_start: header + 1,
1039 jbd_get32(&jbd_fs->sb, blocksize) -
1040 sizeof(struct jbd_bhdr),
1041 func: jbd_display_block_tags,
1042 arg: iblock);
1043}
1044
1045static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
1046 struct jbd_bhdr *header,
1047 struct replay_arg *arg)
1048{
1049 jbd_iterate_block_table(jbd_fs,
1050 tag_start: header + 1,
1051 jbd_get32(&jbd_fs->sb, blocksize) -
1052 sizeof(struct jbd_bhdr),
1053 func: jbd_replay_block_tags,
1054 arg);
1055}
1056
1057/**@brief The core routine of journal replay.
1058 * @param jbd_fs jbd filesystem
1059 * @param info journal replay info
1060 * @param action action needed to be taken
1061 * @return standard error code*/
1062static int jbd_iterate_log(struct jbd_fs *jbd_fs,
1063 struct recover_info *info,
1064 int action)
1065{
1066 int r = EOK;
1067 bool log_end = false;
1068 struct jbd_sb *sb = &jbd_fs->sb;
1069 uint32_t start_trans_id, this_trans_id;
1070 uint32_t start_block, this_block;
1071
1072 /* We start iterating valid blocks in the whole journal.*/
1073 start_trans_id = this_trans_id = jbd_get32(sb, sequence);
1074 start_block = this_block = jbd_get32(sb, start);
1075 if (action == ACTION_SCAN)
1076 info->trans_cnt = 0;
1077 else if (!info->trans_cnt)
1078 log_end = true;
1079
1080 ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
1081 start_trans_id);
1082
1083 while (!log_end) {
1084 struct ext4_block block;
1085 struct jbd_bhdr *header;
1086 /* If we are not scanning for the last
1087 * valid transaction in the journal,
1088 * we will stop when we reach the end of
1089 * the journal.*/
1090 if (action != ACTION_SCAN)
1091 if (trans_id_diff(x: this_trans_id, y: info->last_trans_id) > 0) {
1092 log_end = true;
1093 continue;
1094 }
1095
1096 r = jbd_block_get(jbd_fs, block: &block, fblock: this_block);
1097 if (r != EOK)
1098 break;
1099
1100 header = (struct jbd_bhdr *)block.data;
1101 /* This block does not have a valid magic number,
1102 * so we have reached the end of the journal.*/
1103 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
1104 jbd_block_set(jbd_fs, block: &block);
1105 log_end = true;
1106 continue;
1107 }
1108
1109 /* If the transaction id we found is not expected,
1110 * we may have reached the end of the journal.
1111 *
1112 * If we are not scanning the journal, something
1113 * bad might have taken place. :-( */
1114 if (jbd_get32(header, sequence) != this_trans_id) {
1115 if (action != ACTION_SCAN)
1116 r = EIO;
1117
1118 jbd_block_set(jbd_fs, block: &block);
1119 log_end = true;
1120 continue;
1121 }
1122
1123 switch (jbd_get32(header, blocktype)) {
1124 case JBD_DESCRIPTOR_BLOCK:
1125 if (!jbd_verify_meta_csum(jbd_fs, bhdr: header)) {
1126 ext4_dbg(DEBUG_JBD,
1127 DBG_WARN "Descriptor block checksum failed."
1128 "Journal block: %" PRIu32"\n",
1129 this_block);
1130 log_end = true;
1131 break;
1132 }
1133 ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
1134 "trans_id: %" PRIu32"\n",
1135 this_block, this_trans_id);
1136 if (action == ACTION_RECOVER) {
1137 struct replay_arg replay_arg;
1138 replay_arg.info = info;
1139 replay_arg.this_block = &this_block;
1140 replay_arg.this_trans_id = this_trans_id;
1141
1142 jbd_replay_descriptor_block(jbd_fs,
1143 header, arg: &replay_arg);
1144 } else
1145 jbd_debug_descriptor_block(jbd_fs,
1146 header, iblock: &this_block);
1147
1148 break;
1149 case JBD_COMMIT_BLOCK:
1150 if (!jbd_verify_commit_csum(jbd_fs,
1151 header: (struct jbd_commit_header *)header)) {
1152 ext4_dbg(DEBUG_JBD,
1153 DBG_WARN "Commit block checksum failed."
1154 "Journal block: %" PRIu32"\n",
1155 this_block);
1156 log_end = true;
1157 break;
1158 }
1159 ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
1160 "trans_id: %" PRIu32"\n",
1161 this_block, this_trans_id);
1162 /*
1163 * This is the end of a transaction,
1164 * we may now proceed to the next transaction.
1165 */
1166 this_trans_id++;
1167 if (action == ACTION_SCAN)
1168 info->trans_cnt++;
1169 break;
1170 case JBD_REVOKE_BLOCK:
1171 if (!jbd_verify_meta_csum(jbd_fs, bhdr: header)) {
1172 ext4_dbg(DEBUG_JBD,
1173 DBG_WARN "Revoke block checksum failed."
1174 "Journal block: %" PRIu32"\n",
1175 this_block);
1176 log_end = true;
1177 break;
1178 }
1179 ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
1180 "trans_id: %" PRIu32"\n",
1181 this_block, this_trans_id);
1182 if (action == ACTION_REVOKE) {
1183 info->this_trans_id = this_trans_id;
1184 jbd_build_revoke_tree(jbd_fs,
1185 header, info);
1186 }
1187 break;
1188 default:
1189 log_end = true;
1190 break;
1191 }
1192 jbd_block_set(jbd_fs, block: &block);
1193 this_block++;
1194 wrap(sb, this_block);
1195 if (this_block == start_block)
1196 log_end = true;
1197
1198 }
1199 ext4_dbg(DEBUG_JBD, "End of journal.\n");
1200 if (r == EOK && action == ACTION_SCAN) {
1201 /* We have finished scanning the journal. */
1202 info->start_trans_id = start_trans_id;
1203 if (trans_id_diff(x: this_trans_id, y: start_trans_id) > 0)
1204 info->last_trans_id = this_trans_id - 1;
1205 else
1206 info->last_trans_id = this_trans_id;
1207 }
1208
1209 return r;
1210}
1211
1212/**@brief Replay journal.
1213 * @param jbd_fs jbd filesystem
1214 * @return standard error code*/
1215int jbd_recover(struct jbd_fs *jbd_fs)
1216{
1217 int r;
1218 struct recover_info info;
1219 struct jbd_sb *sb = &jbd_fs->sb;
1220 if (!sb->start)
1221 return EOK;
1222
1223 RB_INIT(&info.revoke_root);
1224
1225 r = jbd_iterate_log(jbd_fs, info: &info, ACTION_SCAN);
1226 if (r != EOK)
1227 return r;
1228
1229 r = jbd_iterate_log(jbd_fs, info: &info, ACTION_REVOKE);
1230 if (r != EOK)
1231 return r;
1232
1233 r = jbd_iterate_log(jbd_fs, info: &info, ACTION_RECOVER);
1234 if (r == EOK) {
1235 /* If we successfully replay the journal,
1236 * clear EXT4_FINCOM_RECOVER flag on the
1237 * ext4 superblock, and set the start of
1238 * journal to 0.*/
1239 uint32_t features_incompatible =
1240 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1241 features_incompatible);
1242 jbd_set32(&jbd_fs->sb, start, 0);
1243 jbd_set32(&jbd_fs->sb, sequence, info.last_trans_id);
1244 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1245 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1246 features_incompatible,
1247 features_incompatible);
1248 jbd_fs->dirty = true;
1249 r = ext4_sb_write(bdev: jbd_fs->bdev,
1250 s: &jbd_fs->inode_ref.fs->sb);
1251 }
1252 jbd_destroy_revoke_tree(info: &info);
1253 return r;
1254}
1255
1256static void jbd_journal_write_sb(struct jbd_journal *journal)
1257{
1258 struct jbd_fs *jbd_fs = journal->jbd_fs;
1259 jbd_set32(&jbd_fs->sb, start, journal->start);
1260 jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
1261 jbd_fs->dirty = true;
1262}
1263
1264/**@brief Start accessing the journal.
1265 * @param jbd_fs jbd filesystem
1266 * @param journal current journal session
1267 * @return standard error code*/
1268int jbd_journal_start(struct jbd_fs *jbd_fs,
1269 struct jbd_journal *journal)
1270{
1271 int r;
1272 uint32_t features_incompatible =
1273 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1274 features_incompatible);
1275 features_incompatible |= EXT4_FINCOM_RECOVER;
1276 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1277 features_incompatible,
1278 features_incompatible);
1279 r = ext4_sb_write(bdev: jbd_fs->bdev,
1280 s: &jbd_fs->inode_ref.fs->sb);
1281 if (r != EOK)
1282 return r;
1283
1284 journal->first = jbd_get32(&jbd_fs->sb, first);
1285 journal->start = journal->first;
1286 journal->last = journal->first;
1287 /*
1288 * To invalidate any stale records we need to start from
1289 * the checkpoint transaction ID of the previous journalling session
1290 * plus 1.
1291 */
1292 journal->trans_id = jbd_get32(&jbd_fs->sb, sequence) + 1;
1293 journal->alloc_trans_id = journal->trans_id;
1294
1295 journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
1296
1297 TAILQ_INIT(&journal->cp_queue);
1298 RB_INIT(&journal->block_rec_root);
1299 journal->jbd_fs = jbd_fs;
1300 jbd_journal_write_sb(journal);
1301 r = jbd_write_sb(jbd_fs);
1302 if (r != EOK)
1303 return r;
1304
1305 jbd_fs->bdev->journal = journal;
1306 return EOK;
1307}
1308
1309static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1310 struct ext4_buf *buf __unused,
1311 int res,
1312 void *arg);
1313
1314/*
1315 * This routine is only suitable to committed transactions. */
1316static void jbd_journal_flush_trans(struct jbd_trans *trans)
1317{
1318 struct jbd_buf *jbd_buf, *tmp;
1319 struct jbd_journal *journal = trans->journal;
1320 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1321 void *tmp_data = ext4_malloc(size: journal->block_size);
1322 ext4_assert(tmp_data);
1323
1324 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1325 tmp) {
1326 struct ext4_buf *buf;
1327 struct ext4_block block;
1328 /* The buffer is not yet flushed. */
1329 buf = ext4_bcache_find_get(bc: fs->bdev->bc, b: &block,
1330 lba: jbd_buf->block_rec->lba);
1331 if (!(buf && ext4_bcache_test_flag(buf, BC_UPTODATE) &&
1332 jbd_buf->block_rec->trans == trans)) {
1333 int r;
1334 struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
1335 r = jbd_block_get(jbd_fs: journal->jbd_fs,
1336 block: &jbd_block,
1337 fblock: jbd_buf->jbd_lba);
1338 ext4_assert(r == EOK);
1339 memcpy(dest: tmp_data, src: jbd_block.data,
1340 size: journal->block_size);
1341 ext4_block_set(bdev: fs->bdev, b: &jbd_block);
1342 r = ext4_blocks_set_direct(bdev: fs->bdev, buf: tmp_data,
1343 lba: jbd_buf->block_rec->lba, cnt: 1);
1344 jbd_trans_end_write(bc: fs->bdev->bc, buf, res: r, arg: jbd_buf);
1345 } else
1346 ext4_block_flush_buf(bdev: fs->bdev, buf);
1347
1348 if (buf)
1349 ext4_block_set(bdev: fs->bdev, b: &block);
1350 }
1351
1352 ext4_free(pointer: tmp_data);
1353}
1354
1355static void
1356jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
1357 struct jbd_trans *trans)
1358{
1359 journal->start = trans->start_iblock +
1360 trans->alloc_blocks;
1361 wrap(&journal->jbd_fs->sb, journal->start);
1362 journal->trans_id = trans->trans_id + 1;
1363 jbd_journal_free_trans(journal,
1364 trans, abort: false);
1365 jbd_journal_write_sb(journal);
1366}
1367
1368void
1369jbd_journal_purge_cp_trans(struct jbd_journal *journal,
1370 bool flush,
1371 bool once)
1372{
1373 struct jbd_trans *trans;
1374 while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1375 if (!trans->data_cnt) {
1376 TAILQ_REMOVE(&journal->cp_queue,
1377 trans,
1378 trans_node);
1379 jbd_journal_skip_pure_revoke(journal, trans);
1380 } else {
1381 if (trans->data_cnt ==
1382 trans->written_cnt) {
1383 journal->start =
1384 trans->start_iblock +
1385 trans->alloc_blocks;
1386 wrap(&journal->jbd_fs->sb,
1387 journal->start);
1388 journal->trans_id =
1389 trans->trans_id + 1;
1390 TAILQ_REMOVE(&journal->cp_queue,
1391 trans,
1392 trans_node);
1393 jbd_journal_free_trans(journal,
1394 trans,
1395 abort: false);
1396 jbd_journal_write_sb(journal);
1397 } else if (!flush) {
1398 journal->start =
1399 trans->start_iblock;
1400 wrap(&journal->jbd_fs->sb,
1401 journal->start);
1402 journal->trans_id =
1403 trans->trans_id;
1404 jbd_journal_write_sb(journal);
1405 break;
1406 } else
1407 jbd_journal_flush_trans(trans);
1408 }
1409 if (once)
1410 break;
1411 }
1412}
1413
1414/**@brief Stop accessing the journal.
1415 * @param journal current journal session
1416 * @return standard error code*/
1417int jbd_journal_stop(struct jbd_journal *journal)
1418{
1419 int r;
1420 struct jbd_fs *jbd_fs = journal->jbd_fs;
1421 uint32_t features_incompatible;
1422
1423 /* Make sure that journalled content have reached
1424 * the disk.*/
1425 jbd_journal_purge_cp_trans(journal, flush: true, once: false);
1426
1427 /* There should be no block record in this journal
1428 * session. */
1429 if (!RB_EMPTY(&journal->block_rec_root))
1430 ext4_dbg(DEBUG_JBD,
1431 DBG_WARN "There are still block records "
1432 "in this journal session!\n");
1433
1434 features_incompatible =
1435 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1436 features_incompatible);
1437 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1438 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1439 features_incompatible,
1440 features_incompatible);
1441 r = ext4_sb_write(bdev: jbd_fs->bdev,
1442 s: &jbd_fs->inode_ref.fs->sb);
1443 if (r != EOK)
1444 return r;
1445
1446 journal->start = 0;
1447 journal->trans_id = 0;
1448 jbd_journal_write_sb(journal);
1449 return jbd_write_sb(jbd_fs: journal->jbd_fs);
1450}
1451
1452/**@brief Allocate a block in the journal.
1453 * @param journal current journal session
1454 * @param trans transaction
1455 * @return allocated block address*/
1456static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1457 struct jbd_trans *trans)
1458{
1459 uint32_t start_block;
1460
1461 start_block = journal->last++;
1462 trans->alloc_blocks++;
1463 wrap(&journal->jbd_fs->sb, journal->last);
1464
1465 /* If there is no space left, flush just one journalled
1466 * transaction.*/
1467 if (journal->last == journal->start) {
1468 jbd_journal_purge_cp_trans(journal, flush: true, once: true);
1469 ext4_assert(journal->last != journal->start);
1470 }
1471
1472 return start_block;
1473}
1474
1475static struct jbd_block_rec *
1476jbd_trans_block_rec_lookup(struct jbd_journal *journal,
1477 ext4_fsblk_t lba)
1478{
1479 struct jbd_block_rec tmp = {
1480 .lba = lba
1481 };
1482
1483 return RB_FIND(jbd_block,
1484 &journal->block_rec_root,
1485 &tmp);
1486}
1487
1488static void
1489jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
1490 struct jbd_trans *new_trans)
1491{
1492 LIST_REMOVE(block_rec, tbrec_node);
1493 if (new_trans) {
1494 /* Now this block record belongs to this transaction. */
1495 LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
1496 }
1497 block_rec->trans = new_trans;
1498}
1499
1500static inline struct jbd_block_rec *
1501jbd_trans_insert_block_rec(struct jbd_trans *trans,
1502 ext4_fsblk_t lba)
1503{
1504 struct jbd_block_rec *block_rec;
1505 block_rec = jbd_trans_block_rec_lookup(journal: trans->journal, lba);
1506 if (block_rec) {
1507 jbd_trans_change_ownership(block_rec, new_trans: trans);
1508 return block_rec;
1509 }
1510 block_rec = ext4_calloc(count: 1, size: sizeof(struct jbd_block_rec));
1511 if (!block_rec)
1512 return NULL;
1513
1514 block_rec->lba = lba;
1515 block_rec->trans = trans;
1516 TAILQ_INIT(&block_rec->dirty_buf_queue);
1517 LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1518 RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
1519 return block_rec;
1520}
1521
1522/*
1523 * This routine will do the dirty works.
1524 */
1525static void
1526jbd_trans_finish_callback(struct jbd_journal *journal,
1527 const struct jbd_trans *trans,
1528 struct jbd_block_rec *block_rec,
1529 bool abort,
1530 bool revoke)
1531{
1532 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1533 if (block_rec->trans != trans)
1534 return;
1535
1536 if (!abort) {
1537 struct jbd_buf *jbd_buf, *tmp;
1538 TAILQ_FOREACH_SAFE(jbd_buf,
1539 &block_rec->dirty_buf_queue,
1540 dirty_buf_node,
1541 tmp) {
1542 jbd_trans_end_write(bc: fs->bdev->bc,
1543 NULL,
1544 EOK,
1545 arg: jbd_buf);
1546 }
1547 } else {
1548 /*
1549 * We have to roll back data if the block is going to be
1550 * aborted.
1551 */
1552 struct jbd_buf *jbd_buf;
1553 struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
1554 block = EXT4_BLOCK_ZERO();
1555 jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
1556 jbd_buf_dirty);
1557 if (jbd_buf) {
1558 if (!revoke) {
1559 int r;
1560 r = ext4_block_get_noread(bdev: fs->bdev,
1561 b: &block,
1562 lba: block_rec->lba);
1563 ext4_assert(r == EOK);
1564 r = jbd_block_get(jbd_fs: journal->jbd_fs,
1565 block: &jbd_block,
1566 fblock: jbd_buf->jbd_lba);
1567 ext4_assert(r == EOK);
1568 memcpy(dest: block.data, src: jbd_block.data,
1569 size: journal->block_size);
1570
1571 jbd_trans_change_ownership(block_rec,
1572 new_trans: jbd_buf->trans);
1573
1574 block.buf->end_write = jbd_trans_end_write;
1575 block.buf->end_write_arg = jbd_buf;
1576
1577 ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
1578 ext4_bcache_set_dirty(buf: block.buf);
1579
1580 ext4_block_set(bdev: fs->bdev, b: &jbd_block);
1581 ext4_block_set(bdev: fs->bdev, b: &block);
1582 return;
1583 } else {
1584 /* The revoked buffer is yet written. */
1585 jbd_trans_change_ownership(block_rec,
1586 new_trans: jbd_buf->trans);
1587 }
1588 }
1589 }
1590}
1591
1592static inline void
1593jbd_trans_remove_block_rec(struct jbd_journal *journal,
1594 struct jbd_block_rec *block_rec,
1595 struct jbd_trans *trans)
1596{
1597 /* If this block record doesn't belong to this transaction,
1598 * give up.*/
1599 if (block_rec->trans == trans) {
1600 LIST_REMOVE(block_rec, tbrec_node);
1601 RB_REMOVE(jbd_block,
1602 &journal->block_rec_root,
1603 block_rec);
1604 ext4_free(pointer: block_rec);
1605 }
1606}
1607
1608/**@brief Add block to a transaction and mark it dirty.
1609 * @param trans transaction
1610 * @param block block descriptor
1611 * @return standard error code*/
1612int jbd_trans_set_block_dirty(struct jbd_trans *trans,
1613 struct ext4_block *block)
1614{
1615 struct jbd_buf *jbd_buf;
1616 struct jbd_revoke_rec *rec, tmp_rec = {
1617 .lba = block->lb_id
1618 };
1619 struct jbd_block_rec *block_rec;
1620
1621 if (block->buf->end_write == jbd_trans_end_write) {
1622 jbd_buf = block->buf->end_write_arg;
1623 if (jbd_buf && jbd_buf->trans == trans)
1624 return EOK;
1625 }
1626 jbd_buf = ext4_calloc(count: 1, size: sizeof(struct jbd_buf));
1627 if (!jbd_buf)
1628 return ENOMEM;
1629
1630 if ((block_rec = jbd_trans_insert_block_rec(trans,
1631 lba: block->lb_id)) == NULL) {
1632 ext4_free(pointer: jbd_buf);
1633 return ENOMEM;
1634 }
1635
1636 TAILQ_INSERT_TAIL(&block_rec->dirty_buf_queue,
1637 jbd_buf,
1638 dirty_buf_node);
1639
1640 jbd_buf->block_rec = block_rec;
1641 jbd_buf->trans = trans;
1642 jbd_buf->block = *block;
1643 ext4_bcache_inc_ref(block->buf);
1644
1645 /* If the content reach the disk, notify us
1646 * so that we may do a checkpoint. */
1647 block->buf->end_write = jbd_trans_end_write;
1648 block->buf->end_write_arg = jbd_buf;
1649
1650 trans->data_cnt++;
1651 TAILQ_INSERT_HEAD(&trans->buf_queue, jbd_buf, buf_node);
1652
1653 ext4_bcache_set_dirty(buf: block->buf);
1654 rec = RB_FIND(jbd_revoke_tree,
1655 &trans->revoke_root,
1656 &tmp_rec);
1657 if (rec) {
1658 RB_REMOVE(jbd_revoke_tree, &trans->revoke_root,
1659 rec);
1660 ext4_free(pointer: rec);
1661 }
1662
1663 return EOK;
1664}
1665
1666/**@brief Add block to be revoked to a transaction
1667 * @param trans transaction
1668 * @param lba logical block address
1669 * @return standard error code*/
1670int jbd_trans_revoke_block(struct jbd_trans *trans,
1671 ext4_fsblk_t lba)
1672{
1673 struct jbd_revoke_rec tmp_rec = {
1674 .lba = lba
1675 }, *rec;
1676 rec = RB_FIND(jbd_revoke_tree,
1677 &trans->revoke_root,
1678 &tmp_rec);
1679 if (rec)
1680 return EOK;
1681
1682 rec = ext4_calloc(count: 1, size: sizeof(struct jbd_revoke_rec));
1683 if (!rec)
1684 return ENOMEM;
1685
1686 rec->lba = lba;
1687 RB_INSERT(jbd_revoke_tree, &trans->revoke_root, rec);
1688 return EOK;
1689}
1690
1691/**@brief Try to add block to be revoked to a transaction.
1692 * If @lba still remains in an transaction on checkpoint
1693 * queue, add @lba as a revoked block to the transaction.
1694 * @param trans transaction
1695 * @param lba logical block address
1696 * @return standard error code*/
1697int jbd_trans_try_revoke_block(struct jbd_trans *trans,
1698 ext4_fsblk_t lba)
1699{
1700 struct jbd_journal *journal = trans->journal;
1701 struct jbd_block_rec *block_rec =
1702 jbd_trans_block_rec_lookup(journal, lba);
1703
1704 if (block_rec) {
1705 if (block_rec->trans == trans) {
1706 struct jbd_buf *jbd_buf =
1707 TAILQ_LAST(&block_rec->dirty_buf_queue,
1708 jbd_buf_dirty);
1709 /* If there are still unwritten buffers. */
1710 if (TAILQ_FIRST(&block_rec->dirty_buf_queue) !=
1711 jbd_buf)
1712 jbd_trans_revoke_block(trans, lba);
1713
1714 } else
1715 jbd_trans_revoke_block(trans, lba);
1716 }
1717
1718 return EOK;
1719}
1720
1721/**@brief Free a transaction
1722 * @param journal current journal session
1723 * @param trans transaction
1724 * @param abort discard all the modifications on the block?*/
1725void jbd_journal_free_trans(struct jbd_journal *journal,
1726 struct jbd_trans *trans,
1727 bool abort)
1728{
1729 struct jbd_buf *jbd_buf, *tmp;
1730 struct jbd_revoke_rec *rec, *tmp2;
1731 struct jbd_block_rec *block_rec, *tmp3;
1732 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1733 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1734 tmp) {
1735 block_rec = jbd_buf->block_rec;
1736 if (abort) {
1737 jbd_buf->block.buf->end_write = NULL;
1738 jbd_buf->block.buf->end_write_arg = NULL;
1739 ext4_bcache_clear_dirty(buf: jbd_buf->block.buf);
1740 ext4_block_set(bdev: fs->bdev, b: &jbd_buf->block);
1741 }
1742
1743 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1744 jbd_buf,
1745 dirty_buf_node);
1746 jbd_trans_finish_callback(journal,
1747 trans,
1748 block_rec,
1749 abort,
1750 revoke: false);
1751 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1752 ext4_free(pointer: jbd_buf);
1753 }
1754 RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
1755 tmp2) {
1756 RB_REMOVE(jbd_revoke_tree, &trans->revoke_root, rec);
1757 ext4_free(pointer: rec);
1758 }
1759 LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
1760 tmp3) {
1761 jbd_trans_remove_block_rec(journal, block_rec, trans);
1762 }
1763
1764 ext4_free(pointer: trans);
1765}
1766
1767/**@brief Write commit block for a transaction
1768 * @param trans transaction
1769 * @return standard error code*/
1770static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1771{
1772 int rc;
1773 struct ext4_block block;
1774 struct jbd_commit_header *header;
1775 uint32_t commit_iblock;
1776 struct jbd_journal *journal = trans->journal;
1777
1778 commit_iblock = jbd_journal_alloc_block(journal, trans);
1779
1780 rc = jbd_block_get_noread(jbd_fs: journal->jbd_fs, block: &block, fblock: commit_iblock);
1781 if (rc != EOK)
1782 return rc;
1783
1784 header = (struct jbd_commit_header *)block.data;
1785 jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1786 jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1787 jbd_set32(&header->header, sequence, trans->trans_id);
1788
1789 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1790 JBD_FEATURE_COMPAT_CHECKSUM)) {
1791 header->chksum_type = JBD_CRC32_CHKSUM;
1792 header->chksum_size = JBD_CRC32_CHKSUM_SIZE;
1793 jbd_set32(header, chksum[0], trans->data_csum);
1794 }
1795 jbd_commit_csum_set(jbd_fs: journal->jbd_fs, header);
1796 ext4_bcache_set_dirty(buf: block.buf);
1797 ext4_bcache_set_flag(block.buf, BC_TMP);
1798 rc = jbd_block_set(jbd_fs: journal->jbd_fs, block: &block);
1799 return rc;
1800}
1801
1802/**@brief Write descriptor block for a transaction
1803 * @param journal current journal session
1804 * @param trans transaction
1805 * @return standard error code*/
1806static int jbd_journal_prepare(struct jbd_journal *journal,
1807 struct jbd_trans *trans)
1808{
1809 int rc = EOK, i = 0;
1810 struct ext4_block desc_block = EXT4_BLOCK_ZERO(),
1811 data_block = EXT4_BLOCK_ZERO();
1812 int32_t tag_tbl_size = 0;
1813 uint32_t desc_iblock = 0;
1814 uint32_t data_iblock = 0;
1815 char *tag_start = NULL, *tag_ptr = NULL;
1816 struct jbd_buf *jbd_buf, *tmp;
1817 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1818 uint32_t checksum = EXT4_CRC32_INIT;
1819 struct jbd_bhdr *bhdr = NULL;
1820 void *data;
1821
1822 /* Try to remove any non-dirty buffers from the tail of
1823 * buf_queue. */
1824 TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
1825 jbd_trans_buf, buf_node, tmp) {
1826 struct jbd_revoke_rec tmp_rec = {
1827 .lba = jbd_buf->block_rec->lba
1828 };
1829 /* We stop the iteration when we find a dirty buffer. */
1830 if (ext4_bcache_test_flag(jbd_buf->block.buf,
1831 BC_DIRTY))
1832 break;
1833
1834 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1835 jbd_buf,
1836 dirty_buf_node);
1837
1838 jbd_buf->block.buf->end_write = NULL;
1839 jbd_buf->block.buf->end_write_arg = NULL;
1840 jbd_trans_finish_callback(journal,
1841 trans,
1842 block_rec: jbd_buf->block_rec,
1843 abort: true,
1844 RB_FIND(jbd_revoke_tree,
1845 &trans->revoke_root,
1846 &tmp_rec));
1847 jbd_trans_remove_block_rec(journal,
1848 block_rec: jbd_buf->block_rec, trans);
1849 trans->data_cnt--;
1850
1851 ext4_block_set(bdev: fs->bdev, b: &jbd_buf->block);
1852 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1853 ext4_free(pointer: jbd_buf);
1854 }
1855
1856 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
1857 struct tag_info tag_info;
1858 bool uuid_exist = false;
1859 bool is_escape = false;
1860 struct jbd_revoke_rec tmp_rec = {
1861 .lba = jbd_buf->block_rec->lba
1862 };
1863 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1864 BC_DIRTY)) {
1865 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1866 jbd_buf,
1867 dirty_buf_node);
1868
1869 jbd_buf->block.buf->end_write = NULL;
1870 jbd_buf->block.buf->end_write_arg = NULL;
1871
1872 /* The buffer has not been modified, just release
1873 * that jbd_buf. */
1874 jbd_trans_finish_callback(journal,
1875 trans,
1876 block_rec: jbd_buf->block_rec,
1877 abort: true,
1878 RB_FIND(jbd_revoke_tree,
1879 &trans->revoke_root,
1880 &tmp_rec));
1881 jbd_trans_remove_block_rec(journal,
1882 block_rec: jbd_buf->block_rec, trans);
1883 trans->data_cnt--;
1884
1885 ext4_block_set(bdev: fs->bdev, b: &jbd_buf->block);
1886 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1887 ext4_free(pointer: jbd_buf);
1888 continue;
1889 }
1890 checksum = jbd_block_csum(jbd_fs: journal->jbd_fs,
1891 buf: jbd_buf->block.data,
1892 csum: checksum,
1893 sequence: trans->trans_id);
1894 if (((struct jbd_bhdr *)jbd_buf->block.data)->magic ==
1895 to_be32(JBD_MAGIC_NUMBER))
1896 is_escape = true;
1897
1898again:
1899 if (!desc_iblock) {
1900 desc_iblock = jbd_journal_alloc_block(journal, trans);
1901 rc = jbd_block_get_noread(jbd_fs: journal->jbd_fs, block: &desc_block, fblock: desc_iblock);
1902 if (rc != EOK)
1903 break;
1904
1905 bhdr = (struct jbd_bhdr *)desc_block.data;
1906 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1907 jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1908 jbd_set32(bhdr, sequence, trans->trans_id);
1909
1910 tag_start = (char *)(bhdr + 1);
1911 tag_ptr = tag_start;
1912 uuid_exist = true;
1913 tag_tbl_size = journal->block_size -
1914 sizeof(struct jbd_bhdr);
1915
1916 if (jbd_has_csum(jbd_sb: &journal->jbd_fs->sb))
1917 tag_tbl_size -= sizeof(struct jbd_block_tail);
1918
1919 if (!trans->start_iblock)
1920 trans->start_iblock = desc_iblock;
1921
1922 ext4_bcache_set_dirty(buf: desc_block.buf);
1923 ext4_bcache_set_flag(desc_block.buf, BC_TMP);
1924 }
1925 tag_info.block = jbd_buf->block.lb_id;
1926 tag_info.uuid_exist = uuid_exist;
1927 tag_info.is_escape = is_escape;
1928 if (i == trans->data_cnt - 1)
1929 tag_info.last_tag = true;
1930 else
1931 tag_info.last_tag = false;
1932
1933 tag_info.checksum = checksum;
1934
1935 if (uuid_exist)
1936 memcpy(dest: tag_info.uuid, src: journal->jbd_fs->sb.uuid,
1937 UUID_SIZE);
1938
1939 rc = jbd_write_block_tag(jbd_fs: journal->jbd_fs,
1940 tag: tag_ptr,
1941 remain_buf_size: tag_tbl_size,
1942 tag_info: &tag_info);
1943 if (rc != EOK) {
1944 jbd_meta_csum_set(jbd_fs: journal->jbd_fs, bhdr);
1945 desc_iblock = 0;
1946 rc = jbd_block_set(jbd_fs: journal->jbd_fs, block: &desc_block);
1947 if (rc != EOK)
1948 break;
1949
1950 goto again;
1951 }
1952
1953 data_iblock = jbd_journal_alloc_block(journal, trans);
1954 rc = jbd_block_get_noread(jbd_fs: journal->jbd_fs, block: &data_block, fblock: data_iblock);
1955 if (rc != EOK) {
1956 desc_iblock = 0;
1957 ext4_bcache_clear_dirty(buf: desc_block.buf);
1958 jbd_block_set(jbd_fs: journal->jbd_fs, block: &desc_block);
1959 break;
1960 }
1961
1962 data = data_block.data;
1963 memcpy(dest: data, src: jbd_buf->block.data,
1964 size: journal->block_size);
1965 if (is_escape)
1966 ((struct jbd_bhdr *)data)->magic = 0;
1967
1968 ext4_bcache_set_dirty(buf: data_block.buf);
1969 ext4_bcache_set_flag(data_block.buf, BC_TMP);
1970 rc = jbd_block_set(jbd_fs: journal->jbd_fs, block: &data_block);
1971 if (rc != EOK) {
1972 desc_iblock = 0;
1973 ext4_bcache_clear_dirty(buf: desc_block.buf);
1974 jbd_block_set(jbd_fs: journal->jbd_fs, block: &desc_block);
1975 break;
1976 }
1977 jbd_buf->jbd_lba = data_iblock;
1978
1979 tag_ptr += tag_info.tag_bytes;
1980 tag_tbl_size -= tag_info.tag_bytes;
1981
1982 i++;
1983 }
1984 if (rc == EOK && desc_iblock) {
1985 jbd_meta_csum_set(jbd_fs: journal->jbd_fs,
1986 bhdr: (struct jbd_bhdr *)bhdr);
1987 trans->data_csum = checksum;
1988 rc = jbd_block_set(jbd_fs: journal->jbd_fs, block: &desc_block);
1989 }
1990
1991 return rc;
1992}
1993
1994/**@brief Write revoke block for a transaction
1995 * @param journal current journal session
1996 * @param trans transaction
1997 * @return standard error code*/
1998static int
1999jbd_journal_prepare_revoke(struct jbd_journal *journal,
2000 struct jbd_trans *trans)
2001{
2002 int rc = EOK, i = 0;
2003 struct ext4_block desc_block = EXT4_BLOCK_ZERO();
2004 int32_t tag_tbl_size = 0;
2005 uint32_t desc_iblock = 0;
2006 char *blocks_entry = NULL;
2007 struct jbd_revoke_rec *rec, *tmp;
2008 struct jbd_revoke_header *header = NULL;
2009 int32_t record_len = 4;
2010 struct jbd_bhdr *bhdr = NULL;
2011
2012 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
2013 JBD_FEATURE_INCOMPAT_64BIT))
2014 record_len = 8;
2015
2016 RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
2017 tmp) {
2018again:
2019 if (!desc_iblock) {
2020 desc_iblock = jbd_journal_alloc_block(journal, trans);
2021 rc = jbd_block_get_noread(jbd_fs: journal->jbd_fs, block: &desc_block,
2022 fblock: desc_iblock);
2023 if (rc != EOK)
2024 break;
2025
2026 bhdr = (struct jbd_bhdr *)desc_block.data;
2027 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
2028 jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
2029 jbd_set32(bhdr, sequence, trans->trans_id);
2030
2031 header = (struct jbd_revoke_header *)bhdr;
2032 blocks_entry = (char *)(header + 1);
2033 tag_tbl_size = journal->block_size -
2034 sizeof(struct jbd_revoke_header);
2035
2036 if (jbd_has_csum(jbd_sb: &journal->jbd_fs->sb))
2037 tag_tbl_size -= sizeof(struct jbd_block_tail);
2038
2039 if (!trans->start_iblock)
2040 trans->start_iblock = desc_iblock;
2041
2042 ext4_bcache_set_dirty(buf: desc_block.buf);
2043 ext4_bcache_set_flag(desc_block.buf, BC_TMP);
2044 }
2045
2046 if (tag_tbl_size < record_len) {
2047 jbd_set32(header, count,
2048 journal->block_size - tag_tbl_size);
2049 jbd_meta_csum_set(jbd_fs: journal->jbd_fs, bhdr);
2050 bhdr = NULL;
2051 desc_iblock = 0;
2052 header = NULL;
2053 rc = jbd_block_set(jbd_fs: journal->jbd_fs, block: &desc_block);
2054 if (rc != EOK)
2055 break;
2056
2057 goto again;
2058 }
2059 if (record_len == 8) {
2060 uint64_t *blocks =
2061 (uint64_t *)blocks_entry;
2062 *blocks = to_be64(rec->lba);
2063 } else {
2064 uint32_t *blocks =
2065 (uint32_t *)blocks_entry;
2066 *blocks = to_be32((uint32_t)rec->lba);
2067 }
2068 blocks_entry += record_len;
2069 tag_tbl_size -= record_len;
2070
2071 i++;
2072 }
2073 if (rc == EOK && desc_iblock) {
2074 if (header != NULL)
2075 jbd_set32(header, count,
2076 journal->block_size - tag_tbl_size);
2077
2078 jbd_meta_csum_set(jbd_fs: journal->jbd_fs, bhdr);
2079 rc = jbd_block_set(jbd_fs: journal->jbd_fs, block: &desc_block);
2080 }
2081
2082 return rc;
2083}
2084
2085/**@brief Put references of block descriptors in a transaction.
2086 * @param journal current journal session
2087 * @param trans transaction*/
2088void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
2089{
2090 struct jbd_buf *jbd_buf, *tmp;
2091 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
2092 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
2093 tmp) {
2094 struct ext4_block block = jbd_buf->block;
2095 ext4_block_set(bdev: fs->bdev, b: &block);
2096 }
2097}
2098
2099/**@brief Update the start block of the journal when
2100 * all the contents in a transaction reach the disk.*/
2101static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
2102 struct ext4_buf *buf,
2103 int res,
2104 void *arg)
2105{
2106 struct jbd_buf *jbd_buf = arg;
2107 struct jbd_trans *trans = jbd_buf->trans;
2108 struct jbd_block_rec *block_rec = jbd_buf->block_rec;
2109 struct jbd_journal *journal = trans->journal;
2110 bool first_in_queue =
2111 trans == TAILQ_FIRST(&journal->cp_queue);
2112 if (res != EOK)
2113 trans->error = res;
2114
2115 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
2116 TAILQ_REMOVE(&block_rec->dirty_buf_queue,
2117 jbd_buf,
2118 dirty_buf_node);
2119
2120 jbd_trans_finish_callback(journal,
2121 trans,
2122 block_rec: jbd_buf->block_rec,
2123 abort: false,
2124 revoke: false);
2125 if (block_rec->trans == trans && buf) {
2126 /* Clear the end_write and end_write_arg fields. */
2127 buf->end_write = NULL;
2128 buf->end_write_arg = NULL;
2129 }
2130
2131 ext4_free(pointer: jbd_buf);
2132
2133 trans->written_cnt++;
2134 if (trans->written_cnt == trans->data_cnt) {
2135 /* If it is the first transaction on checkpoint queue,
2136 * we will shift the start of the journal to the next
2137 * transaction, and remove subsequent written
2138 * transactions from checkpoint queue until we find
2139 * an unwritten one. */
2140 if (first_in_queue) {
2141 journal->start = trans->start_iblock +
2142 trans->alloc_blocks;
2143 wrap(&journal->jbd_fs->sb, journal->start);
2144 journal->trans_id = trans->trans_id + 1;
2145 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
2146 jbd_journal_free_trans(journal, trans, abort: false);
2147
2148 jbd_journal_purge_cp_trans(journal, flush: false, once: false);
2149 jbd_journal_write_sb(journal);
2150 jbd_write_sb(jbd_fs: journal->jbd_fs);
2151 }
2152 }
2153}
2154
2155/**@brief Commit a transaction to the journal immediately.
2156 * @param journal current journal session
2157 * @param trans transaction
2158 * @return standard error code*/
2159static int __jbd_journal_commit_trans(struct jbd_journal *journal,
2160 struct jbd_trans *trans)
2161{
2162 int rc = EOK;
2163 uint32_t last = journal->last;
2164 struct jbd_revoke_rec *rec, *tmp;
2165
2166 trans->trans_id = journal->alloc_trans_id;
2167 rc = jbd_journal_prepare(journal, trans);
2168 if (rc != EOK)
2169 goto Finish;
2170
2171 rc = jbd_journal_prepare_revoke(journal, trans);
2172 if (rc != EOK)
2173 goto Finish;
2174
2175 if (TAILQ_EMPTY(&trans->buf_queue) &&
2176 RB_EMPTY(&trans->revoke_root)) {
2177 /* Since there are no entries in both buffer list
2178 * and revoke entry list, we do not consider trans as
2179 * complete transaction and just return EOK.*/
2180 jbd_journal_free_trans(journal, trans, abort: false);
2181 goto Finish;
2182 }
2183
2184 rc = jbd_trans_write_commit_block(trans);
2185 if (rc != EOK)
2186 goto Finish;
2187
2188 journal->alloc_trans_id++;
2189
2190 /* Complete the checkpoint of buffers which are revoked. */
2191 RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
2192 tmp) {
2193 struct jbd_block_rec *block_rec =
2194 jbd_trans_block_rec_lookup(journal, lba: rec->lba);
2195 struct jbd_buf *jbd_buf = NULL;
2196 if (block_rec)
2197 jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
2198 jbd_buf_dirty);
2199 if (jbd_buf) {
2200 struct ext4_buf *buf;
2201 struct ext4_block block = EXT4_BLOCK_ZERO();
2202 /*
2203 * We do this to reset the ext4_buf::end_write and
2204 * ext4_buf::end_write_arg fields so that the checkpoint
2205 * callback won't be triggered again.
2206 */
2207 buf = ext4_bcache_find_get(bc: journal->jbd_fs->bdev->bc,
2208 b: &block,
2209 lba: jbd_buf->block_rec->lba);
2210 jbd_trans_end_write(bc: journal->jbd_fs->bdev->bc,
2211 buf,
2212 EOK,
2213 arg: jbd_buf);
2214 if (buf)
2215 ext4_block_set(bdev: journal->jbd_fs->bdev, b: &block);
2216 }
2217 }
2218
2219 if (TAILQ_EMPTY(&journal->cp_queue)) {
2220 /*
2221 * This transaction is going to be the first object in the
2222 * checkpoint queue.
2223 * When the first transaction in checkpoint queue is completely
2224 * written to disk, we shift the tail of the log to right.
2225 */
2226 if (trans->data_cnt) {
2227 journal->start = trans->start_iblock;
2228 wrap(&journal->jbd_fs->sb, journal->start);
2229 journal->trans_id = trans->trans_id;
2230 jbd_journal_write_sb(journal);
2231 jbd_write_sb(jbd_fs: journal->jbd_fs);
2232 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2233 trans_node);
2234 jbd_journal_cp_trans(journal, trans);
2235 } else {
2236 journal->start = trans->start_iblock +
2237 trans->alloc_blocks;
2238 wrap(&journal->jbd_fs->sb, journal->start);
2239 journal->trans_id = trans->trans_id + 1;
2240 jbd_journal_write_sb(journal);
2241 jbd_journal_free_trans(journal, trans, abort: false);
2242 }
2243 } else {
2244 /* No need to do anything to the JBD superblock. */
2245 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2246 trans_node);
2247 if (trans->data_cnt)
2248 jbd_journal_cp_trans(journal, trans);
2249 }
2250Finish:
2251 if (rc != EOK && rc != ENOSPC) {
2252 journal->last = last;
2253 jbd_journal_free_trans(journal, trans, abort: true);
2254 }
2255 return rc;
2256}
2257
2258/**@brief Allocate a new transaction
2259 * @param journal current journal session
2260 * @return transaction allocated*/
2261struct jbd_trans *
2262jbd_journal_new_trans(struct jbd_journal *journal)
2263{
2264 struct jbd_trans *trans = NULL;
2265 trans = ext4_calloc(count: 1, size: sizeof(struct jbd_trans));
2266 if (!trans)
2267 return NULL;
2268
2269 /* We will assign a trans_id to this transaction,
2270 * once it has been committed.*/
2271 trans->journal = journal;
2272 trans->data_csum = EXT4_CRC32_INIT;
2273 trans->error = EOK;
2274 TAILQ_INIT(&trans->buf_queue);
2275 return trans;
2276}
2277
2278/**@brief Commit a transaction to the journal immediately.
2279 * @param journal current journal session
2280 * @param trans transaction
2281 * @return standard error code*/
2282int jbd_journal_commit_trans(struct jbd_journal *journal,
2283 struct jbd_trans *trans)
2284{
2285 int r = EOK;
2286 r = __jbd_journal_commit_trans(journal, trans);
2287 return r;
2288}
2289
2290/**
2291 * @}
2292 */
2293