solr4.3版本出了,支持分片拆分功能,就是可以把一个分片切分成两个小分片。比如shard1,切分后就变成shard1_0和shard1_1,以后对shard1的索引和搜索操作会路由到这两个分片。

大致步骤如下:
1.在shard1所在节点新增两个core,覆盖新的哈希范围(solr是通过哈希范围来决定数据存放在那个分片,和一致性哈希有点类似)
2.这两个core进入维护模式
3.leader会把这两个新分片当成是副本
4.搜索时不用搜索所有分片,但要把搜索范围覆盖完整的哈希环
5.leader会commit数据然后开始拆分索引
6.拆分出来的索引分片会放到新的core那里
7.Overseer监工会为这些新分片创建副本
8.把旧分片标识为“过期”

看了下索引拆分的实现源码,主要在下面这个类org.apache.solr.update.SolrIndexSplitter的split方法

    //取得当前分片索引的顶级上下文(TopReaderContext)的子叶(leaves),即是segments。
    //简单理解就是取得索引的读取器,准备读取索引数据
    List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves();
    List<OpenBitSet[]> segmentDocSets = new ArrayList<OpenBitSet[]>(leaves.size());

    log.info("SolrIndexSplitter: partitions=" + numPieces + " segments="+leaves.size());
    //按顺序读取索引文档到OpenBitSet中
    for (AtomicReaderContext readerContext : leaves) {
      assert readerContext.ordInParent == segmentDocSets.size();  //确保按顺序读取文档
      //把原有索引文档重新拆分,根据文档id的哈希值分配到新划分的环上,也就是新的分片区域
      OpenBitSet[] docSets = split(readerContext);
      segmentDocSets.add( docSets );
    }

    IndexReader[] subReaders = new IndexReader[leaves.size()];
    //根据设置的拆分数拆分索引,每个core对应一个分片
    for (int partitionNumber=0; partitionNumber<numPieces; partitionNumber++) {
      log.info("SolrIndexSplitter: partition #" + partitionNumber + (ranges != null ? " range=" + ranges.get(partitionNumber) : ""));
      //subReader读取本范围下索引的segment
      for (int segmentNumber = 0; segmentNumber<subReaders.length; segmentNumber++) {
        subReaders[segmentNumber] = new LiveDocsReader( leaves.get(segmentNumber), segmentDocSets.get(segmentNumber)[partitionNumber] );
      }

      boolean success = false;
      //通过solrcore取得IndexWriter
      RefCounted<IndexWriter> iwRef = null;
      IndexWriter iw = null;
      if (cores != null) {
        SolrCore subCore = cores.get(partitionNumber);
        iwRef = subCore.getUpdateHandler().getSolrCoreState().getIndexWriter(subCore);
        iw = iwRef.get();
      } else {
        SolrCore core = searcher.getCore();
        String path = paths.get(partitionNumber);
        iw = SolrIndexWriter.create("SplittingIndexWriter"+partitionNumber + (ranges != null ? " " + ranges.get(partitionNumber) : ""), path,
                                    core.getDirectoryFactory(), true, core.getSchema(),
                                    core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec());
      }

      try {
        //合并subreader里面的文档,这操作会同时移除删除了的数据,不用再执行optimize操作
        iw.addIndexes(subReaders);
        success = true;
      } finally {
        if (iwRef != null) {
          iwRef.decref();
        } else {
          if (success) {
            IOUtils.close(iw);
          } else {
            IOUtils.closeWhileHandlingException(iw);
          }
        }
      }

    }

   OpenBitSet[] split(AtomicReaderContext readerContext) throws IOException {
    AtomicReader reader = readerContext.reader();
    OpenBitSet[] docSets = new OpenBitSet[numPieces];
    for (int i=0; i<docSets.length; i++) {
      docSets[i] = new OpenBitSet(reader.maxDoc());
    }
    Bits liveDocs = reader.getLiveDocs();
    //取得文档id的词项
    Fields fields = reader.fields();
    Terms terms = fields==null ? null : fields.terms(field.getName());
    TermsEnum termsEnum = terms==null ? null : terms.iterator(null);
    if (termsEnum == null) return docSets;

    BytesRef term = null;
    DocsEnum docsEnum = null;

    for (;;) {
      term = termsEnum.next();
      if (term == null) break;

      // 计算出词项(term)的哈希值,把文档根据哈希值重新分配到哈希环位置,也就是分片位置
      CharsRef ref = new CharsRef(term.length);
      ref = field.getType().indexedToReadable(term, ref);
      int hash = Hash.murmurhash3_x86_32(ref, ref.offset, ref.length, 0);
      docsEnum = termsEnum.docs(liveDocs, docsEnum, DocsEnum.FLAG_NONE);
      for (;;) {
        int doc = docsEnum.nextDoc();
        if (doc == DocsEnum.NO_MORE_DOCS) break;
        if (ranges == null) {
          docSets[currPartition].fastSet(doc);
          currPartition = (currPartition + 1) % numPieces;
        } else  {
          for (int i=0; i<rangesArr.length; i++) {      
            if (rangesArr[i].includes(hash)) {
              docSets[i].fastSet(doc);
            }
          }
        }
      }
    }

    return docSets;
  }