1 static inline unsigned shift128(uint64_t high, uint64_t low, unsigned shift)
   2 {
   3         /* shift is 7 bits, so mask2 is ~0 iff the high bit in shift is set */
   4         unsigned ret, mask = 0 - (shift >> 6);
   5         shift &= 63;
   6 
   7         ret  = low >> shift;
   8         ret |= (high << 1) << (63U - shift);
   9 
  10         ret &= ~mask;
  11         ret |=  mask & (high >> shift);
  12 
  13         return ret;
  14 }
  15 
  16 static inline unsigned bfloat_mantissa(const struct bkey *k,
  17                                        struct bkey_float *f)
  18 {
  19         return shift128(k->header, k->key, f->exponent) & BKEY_MANTISSA_MASK;
  20 }
  21 
  22 static struct bset_search_iter bset_search_tree(struct btree *b,
  23                                                 struct bset_tree *t,
  24                                                 const struct bkey *search)
  25 {
  26         struct bkey *l, *r;
  27         struct bkey_float *f;
  28         unsigned inorder, j, n = 1;
  29 
  30         do {
  31                 unsigned p = n << 4;
  32                 p &= ((int) (p - t->size)) >> 31;
  33 
  34                 prefetch(&t->tree[p]);
  35 
  36                 j = n;
  37                 f = &t->tree[j];
  38 
  39                 /*
  40                  * n = (f->mantissa > bfloat_mantissa())
  41                  *      ? j * 2
  42                  *      : j * 2 + 1;
  43                  */
  44                 if (likely(f->exponent != 127))
  45                         n = j * 2 + (((unsigned)
  46                                       (f->mantissa -
  47                                        bfloat_mantissa(search, f) - 1)) >> 31);
  48                 else
  49                         n = (bkey_cmp(tree_to_bkey(t, j), search) > 0)
  50                                 ? j * 2
  51                                 : j * 2 + 1;
  52         } while (n < t->size);
  53 
  54         inorder = to_inorder(j, t);
  55 
  56         /*
  57          * n would have been the node we recursed to - the low bit tells us if
  58          * we recursed left or recursed right.
  59          */
  60         if (n & 1) {
  61                 l = cacheline_to_bkey(t, inorder, f->m);
  62 
  63                 if (++inorder != t->size) {
  64                         f = &t->tree[inorder_next(j, t->size)];
  65                         r = cacheline_to_bkey(t, inorder, f->m);
  66                 } else
  67                         r = end(t->data);
  68         } else {
  69                 r = cacheline_to_bkey(t, inorder, f->m);
  70 
  71                 if (--inorder) {
  72                         f = &t->tree[inorder_prev(j, t->size)];
  73                         l = cacheline_to_bkey(t, inorder, f->m);
  74                 } else
  75                         l = t->data->start;
  76         }
  77 
  78         return (struct bset_search_iter) {l, r};
  79 }
  80 
  81 0000000000000000 <bset_search_tree.isra.6>:
  82    0:   55                      push   %rbp
  83    1:   48 89 e5                mov    %rsp,%rbp
  84    4:   41 57                   push   %r15
  85    6:   41 56                   push   %r14
  86    8:   41 55                   push   %r13
  87    a:   41 54                   push   %r12
  88    c:   41 bc 01 00 00 00       mov    $0x1,%r12d
  89   12:   53                      push   %rbx
  90   13:   48 83 ec 48             sub    $0x48,%rsp
  91   17:   48 89 7d 98             mov    %rdi,-0x68(%rbp)
  92   1b:   48 89 75 a0             mov    %rsi,-0x60(%rbp)
  93   1f:   44 8b 2f                mov    (%rdi),%r13d
  94   22:   4c 8b 77 18             mov    0x18(%rdi),%r14
  95   26:   eb 0b                   jmp    33 <bset_search_tree.isra.6+0x33>
  96   28:   0f 1f 84 00 00 00 00    nopl   0x0(%rax,%rax,1)
  97   2f:   00 
  98   30:   41 89 c4                mov    %eax,%r12d
  99   33:   44 89 e2                mov    %r12d,%edx
 100   36:   c1 e2 04                shl    $0x4,%edx
 101   39:   89 d0                   mov    %edx,%eax
 102   3b:   44 29 e8                sub    %r13d,%eax
 103   3e:   c1 f8 1f                sar    $0x1f,%eax
 104   41:   21 d0                   and    %edx,%eax
 105   43:   49 8d 04 86             lea    (%r14,%rax,4),%rax
 106   47:   0f 18 08                prefetcht0 (%rax)
 107   4a:   44 89 e0                mov    %r12d,%eax
 108   4d:   49 8d 1c 86             lea    (%r14,%rax,4),%rbx
 109   51:   44 0f b6 3b             movzbl (%rbx),%r15d
 110   55:   44 89 f8                mov    %r15d,%eax
 111   58:   83 e0 7f                and    $0x7f,%eax
 112   5b:   3c 7f                   cmp    $0x7f,%al
 113   5d:   0f 84 b2 01 00 00       je     215 <bset_search_tree.isra.6+0x215>
 114   63:   48 8b 7d a0             mov    -0x60(%rbp),%rdi
 115   67:   43 8d 14 24             lea    (%r12,%r12,1),%edx
 116   6b:   48 89 de                mov    %rbx,%rsi
 117   6e:   89 55 ac                mov    %edx,-0x54(%rbp)
 118   71:   e8 00 00 00 00          callq  76 <bset_search_tree.isra.6+0x76>
 119   76:   0f b6 53 01             movzbl 0x1(%rbx),%edx
 120   7a:   0f b6 4b 02             movzbl 0x2(%rbx),%ecx
 121   7e:   c0 ea 02                shr    $0x2,%dl
 122   81:   0f b6 d2                movzbl %dl,%edx
 123   84:   48 c1 e1 06             shl    $0x6,%rcx
 124   88:   48 09 d1                or     %rdx,%rcx
 125   8b:   0f b6 53 03             movzbl 0x3(%rbx),%edx
 126   8f:   48 c1 e2 0e             shl    $0xe,%rdx
 127   93:   48 09 ca                or     %rcx,%rdx
 128   96:   83 ea 01                sub    $0x1,%edx
 129   99:   29 c2                   sub    %eax,%edx
 130   9b:   8b 45 ac                mov    -0x54(%rbp),%eax
 131   9e:   c1 ea 1f                shr    $0x1f,%edx
 132   a1:   01 d0                   add    %edx,%eax
 133   a3:   44 39 e8                cmp    %r13d,%eax
 134   a6:   72 88                   jb     30 <bset_search_tree.isra.6+0x30>
 135   a8:   48 8b 55 98             mov    -0x68(%rbp),%rdx
 136   ac:   be ff ff ff ff          mov    $0xffffffff,%esi
 137   b1:   45 8d 44 35 00          lea    0x0(%r13,%rsi,1),%r8d
 138   b6:   41 0f bd cc             bsr    %r12d,%ecx
 139   ba:   0f 44 ce                cmove  %esi,%ecx
 140   bd:   8b 7a 04                mov    0x4(%rdx),%edi
 141   c0:   41 0f bd d0             bsr    %r8d,%edx
 142   c4:   0f 44 d6                cmove  %esi,%edx
 143   c7:   be 01 00 00 00          mov    $0x1,%esi
 144   cc:   29 ca                   sub    %ecx,%edx
 145   ce:   d3 e6                   shl    %cl,%esi
 146   d0:   89 d1                   mov    %edx,%ecx
 147   d2:   44 31 e6                xor    %r12d,%esi
 148   d5:   01 f6                   add    %esi,%esi
 149   d7:   83 ce 01                or     $0x1,%esi
 150   da:   d3 e6                   shl    %cl,%esi
 151   dc:   39 f7                   cmp    %esi,%edi
 152   de:   73 08                   jae    e8 <bset_search_tree.isra.6+0xe8>
 153   e0:   89 f2                   mov    %esi,%edx
 154   e2:   29 fa                   sub    %edi,%edx
 155   e4:   d1 ea                   shr    %edx
 156   e6:   29 d6                   sub    %edx,%esi
 157   e8:   a8 01                   test   $0x1,%al
 158   ea:   0f 84 88 00 00 00       je     178 <bset_search_tree.isra.6+0x178>
 159   f0:   48 8b 45 98             mov    -0x68(%rbp),%rax
 160   f4:   41 c0 ef 07             shr    $0x7,%r15b
 161   f8:   89 f1                   mov    %esi,%ecx
 162   fa:   45 0f b6 ff             movzbl %r15b,%r15d
 163   fe:   c1 e1 07                shl    $0x7,%ecx
 164  101:   48 8b 50 28             mov    0x28(%rax),%rdx
 165  105:   0f b6 43 01             movzbl 0x1(%rbx),%eax
 166  109:   83 e0 03                and    $0x3,%eax
 167  10c:   48 01 c0                add    %rax,%rax
 168  10f:   4c 09 f8                or     %r15,%rax
 169  112:   48 8d 1c c1             lea    (%rcx,%rax,8),%rbx
 170  116:   44 8d 7e 01             lea    0x1(%rsi),%r15d
 171  11a:   48 01 d3                add    %rdx,%rbx
 172  11d:   45 39 ef                cmp    %r13d,%r15d
 173  120:   0f 84 e2 00 00 00       je     208 <bset_search_tree.isra.6+0x208>
 174  126:   44 89 ee                mov    %r13d,%esi
 175  129:   44 89 e7                mov    %r12d,%edi
 176  12c:   48 89 55 90             mov    %rdx,-0x70(%rbp)
 177  130:   e8 00 00 00 00          callq  135 <bset_search_tree.isra.6+0x135>
 178  135:   48 8b 55 90             mov    -0x70(%rbp),%rdx
 179  139:   41 c1 e7 07             shl    $0x7,%r15d
 180  13d:   89 c0                   mov    %eax,%eax
 181  13f:   41 0f b6 0c 86          movzbl (%r14,%rax,4),%ecx
 182  144:   41 0f b6 44 86 01       movzbl 0x1(%r14,%rax,4),%eax
 183  14a:   c0 e9 07                shr    $0x7,%cl
 184  14d:   83 e0 03                and    $0x3,%eax
 185  150:   0f b6 c9                movzbl %cl,%ecx
 186  153:   48 01 c0                add    %rax,%rax
 187  156:   48 09 c8                or     %rcx,%rax
 188  159:   49 8d 04 c7             lea    (%r15,%rax,8),%rax
 189  15d:   48 01 c2                add    %rax,%rdx
 190  160:   48 83 c4 48             add    $0x48,%rsp
 191  164:   48 89 d8                mov    %rbx,%rax
 192  167:   5b                      pop    %rbx
 193  168:   41 5c                   pop    %r12
 194  16a:   41 5d                   pop    %r13
 195  16c:   41 5e                   pop    %r14
 196  16e:   41 5f                   pop    %r15
 197  170:   5d                      pop    %rbp
 198  171:   c3                      retq   
 199  172:   66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
 200  178:   48 8b 55 98             mov    -0x68(%rbp),%rdx
 201  17c:   41 c0 ef 07             shr    $0x7,%r15b
 202  180:   89 f1                   mov    %esi,%ecx
 203  182:   45 0f b6 ff             movzbl %r15b,%r15d
 204  186:   c1 e1 07                shl    $0x7,%ecx
 205  189:   48 8b 42 28             mov    0x28(%rdx),%rax
 206  18d:   0f b6 53 01             movzbl 0x1(%rbx),%edx
 207  191:   48 8d 58 20             lea    0x20(%rax),%rbx
 208  195:   83 e2 03                and    $0x3,%edx
 209  198:   48 01 d2                add    %rdx,%rdx
 210  19b:   4c 09 fa                or     %r15,%rdx
 211  19e:   48 8d 14 d1             lea    (%rcx,%rdx,8),%rdx
 212  1a2:   48 01 c2                add    %rax,%rdx
 213  1a5:   83 ee 01                sub    $0x1,%esi
 214  1a8:   74 b6                   je     160 <bset_search_tree.isra.6+0x160>
 215  1aa:   43 8d 3c 24             lea    (%r12,%r12,1),%edi
 216  1ae:   44 39 ef                cmp    %r13d,%edi
 217  1b1:   72 47                   jb     1fa <bset_search_tree.isra.6+0x1fa>
 218  1b3:   b9 ff ff ff ff          mov    $0xffffffff,%ecx
 219  1b8:   44 89 e7                mov    %r12d,%edi
 220  1bb:   41 0f bc cc             bsf    %r12d,%ecx
 221  1bf:   0f 44 c9                cmove  %ecx,%ecx
 222  1c2:   83 c1 01                add    $0x1,%ecx
 223  1c5:   d3 ef                   shr    %cl,%edi
 224  1c7:   41 0f b6 0c be          movzbl (%r14,%rdi,4),%ecx
 225  1cc:   c1 e6 07                shl    $0x7,%esi
 226  1cf:   c0 e9 07                shr    $0x7,%cl
 227  1d2:   44 0f b6 c1             movzbl %cl,%r8d
 228  1d6:   41 0f b6 4c be 01       movzbl 0x1(%r14,%rdi,4),%ecx
 229  1dc:   83 e1 03                and    $0x3,%ecx
 230  1df:   48 01 c9                add    %rcx,%rcx
 231  1e2:   4c 09 c1                or     %r8,%rcx
 232  1e5:   48 8d 1c ce             lea    (%rsi,%rcx,8),%rbx
 233  1e9:   48 01 c3                add    %rax,%rbx
 234  1ec:   e9 6f ff ff ff          jmpq   160 <bset_search_tree.isra.6+0x160>
 235  1f1:   0f 1f 80 00 00 00 00    nopl   0x0(%rax)
 236  1f8:   89 cf                   mov    %ecx,%edi
 237  1fa:   8d 4c 3f 01             lea    0x1(%rdi,%rdi,1),%ecx
 238  1fe:   44 39 e9                cmp    %r13d,%ecx
 239  201:   72 f5                   jb     1f8 <bset_search_tree.isra.6+0x1f8>
 240  203:   eb c2                   jmp    1c7 <bset_search_tree.isra.6+0x1c7>
 241  205:   0f 1f 00                nopl   (%rax)
 242  208:   8b 42 1c                mov    0x1c(%rdx),%eax
 243  20b:   48 8d 54 c2 20          lea    0x20(%rdx,%rax,8),%rdx
 244  210:   e9 4b ff ff ff          jmpq   160 <bset_search_tree.isra.6+0x160>
 245  215:   48 8b 7d 98             mov    -0x68(%rbp),%rdi
 246  219:   44 89 e6                mov    %r12d,%esi
 247  21c:   e8 00 00 00 00          callq  221 <bset_search_tree.isra.6+0x221>
 248  221:   48 8b 75 a0             mov    -0x60(%rbp),%rsi
 249  225:   48 8b 10                mov    (%rax),%rdx
 250  228:   48 8b 0e                mov    (%rsi),%rcx
 251  22b:   81 e2 ff ff 0f 00       and    $0xfffff,%edx
 252  231:   81 e1 ff ff 0f 00       and    $0xfffff,%ecx
 253  237:   48 39 ca                cmp    %rcx,%rdx
 254  23a:   75 1c                   jne    258 <bset_search_tree.isra.6+0x258>
 255  23c:   48 8b 50 08             mov    0x8(%rax),%rdx
 256  240:   48 2b 56 08             sub    0x8(%rsi),%rdx
 257  244:   43 8d 44 24 01          lea    0x1(%r12,%r12,1),%eax
 258  249:   48 85 d2                test   %rdx,%rdx
 259  24c:   43 8d 0c 24             lea    (%r12,%r12,1),%ecx
 260  250:   0f 4f c1                cmovg  %ecx,%eax
 261  253:   e9 4b fe ff ff          jmpq   a3 <bset_search_tree.isra.6+0xa3>
 262  258:   48 29 ca                sub    %rcx,%rdx
 263  25b:   eb e7                   jmp    244 <bset_search_tree.isra.6+0x244>
 264  25d:   0f 1f 00                nopl   (%rax)

BcacheWiki: Bset_search_tree (last edited 2012-01-22 07:27:24 by Kent)