7 #ifndef DOCUMENTSWRITER_H
8 #define DOCUMENTSWRITER_H
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
Definition: DocumentsWriter.h:497
int32_t blockSize
Definition: DocumentsWriter.h:508
virtual ByteArray getByteBlock(bool trackAllocations)
Allocate another byte[] from the shared pool.
DocumentsWriterWeakPtr _docWriter
Definition: DocumentsWriter.h:502
ByteBlockAllocator(const DocumentsWriterPtr &docWriter, int32_t blockSize)
virtual void recycleByteBlocks(Collection< ByteArray > blocks, int32_t start, int32_t end)
Return byte[]'s to the pool.
virtual void recycleByteBlocks(Collection< ByteArray > blocks)
Collection< ByteArray > freeByteBlocks
Definition: DocumentsWriter.h:509
virtual ~ByteBlockAllocator()
Definition: ByteBlockPool.h:54
This is the current indexing chain: DocConsumer / DocConsumerPerThread --> code: DocFieldProcessor / ...
Definition: DocumentsWriter.h:447
virtual DocConsumerPtr getChain(const DocumentsWriterPtr &documentsWriter)
virtual ~DefaultIndexingChain()
Definition: DocumentsWriter.h:356
InfoStreamPtr infoStream
Definition: DocumentsWriter.h:367
String maxTermPrefix
Definition: DocumentsWriter.h:371
SimilarityPtr similarity
Definition: DocumentsWriter.h:368
virtual bool testPoint(const String &name)
Only called by asserts.
DocumentPtr doc
Definition: DocumentsWriter.h:370
AnalyzerPtr analyzer
Definition: DocumentsWriter.h:365
int32_t maxFieldLength
Definition: DocumentsWriter.h:366
int32_t docID
Definition: DocumentsWriter.h:369
DocumentsWriterWeakPtr _docWriter
Definition: DocumentsWriter.h:361
Consumer returns this on each doc. This holds any state that must be flushed synchronized "in docID o...
Definition: DocumentsWriter.h:402
DocWriterPtr next
Definition: DocumentsWriter.h:407
virtual void setNext(const DocWriterPtr &next)
virtual int64_t sizeInBytes()=0
int32_t docID
Definition: DocumentsWriter.h:411
This class accepts multiple added documents and directly writes a single segment file....
Definition: DocumentsWriter.h:54
void setMaxFieldLength(int32_t maxFieldLength)
int32_t nextDocID
Definition: DocumentsWriter.h:65
DocumentsWriterThreadStatePtr getThreadState(const DocumentPtr &doc, const TermPtr &delTerm)
Returns a free (idle) ThreadState that may be used for indexing this one document....
ByteBlockAllocatorPtr perDocAllocator
Definition: DocumentsWriter.h:178
void recycleIntBlocks(Collection< IntArray > blocks, int32_t start, int32_t end)
static const int32_t BYTES_PER_DEL_QUERY
Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER)....
Definition: DocumentsWriter.h:132
bool bufferDeleteQueries(Collection< QueryPtr > queries)
void finishDocument(const DocumentsWriterThreadStatePtr &perThread, const DocWriterPtr &docWriter)
Does the synchronized work to finish/flush the inverted document.
SegmentWriteStatePtr flushState
Definition: DocumentsWriter.h:106
String closeDocStore()
Closes the current open doc stores an returns the doc store segment name. This returns null if there ...
double getRAMBufferSizeMB()
void setRAMBufferSizeMB(double mb)
Set how much RAM we can use before flushing.
int32_t numDocsInStore
Definition: DocumentsWriter.h:160
String getSegment()
Get current segment name we are writing.
PerDocBufferPtr newPerDocBuffer()
Create and return a new DocWriterBuffer.
int32_t getDocStoreOffset()
Returns the doc offset into the shared doc store for the current buffered docs.
static const int32_t INT_BLOCK_SHIFT
Initial chunks size of the shared int[] blocks used to store postings data.
Definition: DocumentsWriter.h:148
ByteBlockAllocatorPtr byteBlockAllocator
Definition: DocumentsWriter.h:177
SimilarityPtr similarity
Definition: DocumentsWriter.h:167
HashSet< String > _closedFiles
Definition: DocumentsWriter.h:172
DirectoryPtr directory
Definition: DocumentsWriter.h:156
void doAfterFlush()
Reset after a flush.
static const int32_t CHAR_BLOCK_SHIFT
Initial chunk size of the shared char[] blocks used to store term text.
Definition: DocumentsWriter.h:141
bool bufferDeleteTerms(Collection< TermPtr > terms)
int32_t getMaxBufferedDocs()
MapThreadDocumentsWriterThreadState threadBindings
Definition: DocumentsWriter.h:71
int64_t waitQueueResumeBytes
Definition: DocumentsWriter.h:90
int32_t getNumDocsInRAM()
Returns how many docs are currently buffered in RAM.
BufferedDeletesPtr deletesFlushed
Deletes done before the last flush; these are still kept on abort.
Definition: DocumentsWriter.h:82
bool aborting
Definition: DocumentsWriter.h:74
int64_t numBytesAlloc
Definition: DocumentsWriter.h:180
int64_t numBytesUsed
Definition: DocumentsWriter.h:181
void setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms)
bool timeToFlushDeletes()
String docStoreSegment
Definition: DocumentsWriter.h:59
void recycleCharBlocks(Collection< CharArray > blocks, int32_t numBlocks)
static const int32_t MAX_TERM_LENGTH
Definition: DocumentsWriter.h:145
static IndexingChainPtr getDefaultIndexingChain()
bool pauseAllThreads()
Returns true if an abort is in progress.
HashSet< String > closedFiles()
IndexingChainPtr indexingChain
Definition: DocumentsWriter.h:157
static const int32_t BYTE_BLOCK_MASK
Definition: DocumentsWriter.h:137
bool closed
Definition: DocumentsWriter.h:102
void removeOpenFile(const String &name)
int64_t ramBufferSize
How much RAM we can use before flushing. This is 0 if we are flushing by doc count instead.
Definition: DocumentsWriter.h:88
int32_t pauseThreads
Definition: DocumentsWriter.h:73
int32_t maxBufferedDocs
Flush @ this number of docs. If ramBufferSize is non-zero we will flush by RAM usage instead.
Definition: DocumentsWriter.h:97
virtual void initialize()
Called directly after instantiation to create objects that depend on this object being fully construc...
InfoStreamPtr infoStream
Definition: DocumentsWriter.h:165
bool updateDocument(const DocumentPtr &doc, const AnalyzerPtr &analyzer, const TermPtr &delTerm)
void addOpenFile(const String &name)
void abort()
Called if we hit an exception at a bad time (when updating the index files) and must discard all curr...
bool updateDocument(const TermPtr &t, const DocumentPtr &doc, const AnalyzerPtr &analyzer)
int32_t flush(bool _closeDocStore)
Flush all pending docs to a new segment.
int32_t maxBufferedDeleteTerms
The max number of delete terms that can be buffered before they must be flushed to disk.
Definition: DocumentsWriter.h:85
static const int32_t BYTES_PER_DEL_DOCID
Rough logic: del docIDs are List<Integer>. Say list allocates ~2X size (2*POINTER)....
Definition: DocumentsWriter.h:127
void setInfoStream(const InfoStreamPtr &infoStream)
If non-null, various details of indexing are printed here.
void addDeleteTerm(const TermPtr &term, int32_t docCount)
int64_t waitQueuePauseBytes
Definition: DocumentsWriter.h:89
int32_t flushedDocCount
How many docs already flushed to index.
Definition: DocumentsWriter.h:100
bool applyDeletes(const SegmentInfosPtr &infos)
HashSet< String > abortedFiles()
HashSet< String > _openFiles
Definition: DocumentsWriter.h:171
Collection< IntArray > freeIntBlocks
Definition: DocumentsWriter.h:108
bool bufferDeleteTerm(const TermPtr &term)
bool addDocument(const DocumentPtr &doc, const AnalyzerPtr &analyzer)
Returns true if the caller (IndexWriter) should now flush.
DocConsumerPtr consumer
Definition: DocumentsWriter.h:169
static const int32_t CHAR_BLOCK_SIZE
Definition: DocumentsWriter.h:142
static const int32_t INT_BLOCK_MASK
Definition: DocumentsWriter.h:150
void initSegmentName(bool onlyDocStore)
void bytesUsed(int64_t numBytes)
int32_t getNumBufferedDeleteTerms()
int64_t freeTrigger
If we've allocated 5% over our RAM budget, we then free down to 95%.
Definition: DocumentsWriter.h:93
static const int32_t CHAR_BLOCK_MASK
Definition: DocumentsWriter.h:143
void updateFlushedDocCount(int32_t n)
void bytesAllocated(int64_t numBytes)
String segment
Definition: DocumentsWriter.h:158
static const int32_t INT_NUM_BYTE
Definition: DocumentsWriter.h:115
static const int32_t BYTE_BLOCK_SIZE
Definition: DocumentsWriter.h:136
int32_t getFlushedDocCount()
static const int32_t POINTER_NUM_BYTE
Definition: DocumentsWriter.h:114
Collection< DocumentsWriterThreadStatePtr > threadStates
Definition: DocumentsWriter.h:70
static const int32_t CHAR_NUM_BYTE
Definition: DocumentsWriter.h:116
static const int32_t OBJECT_HEADER_BYTES
Coarse estimates used to measure RAM usage of buffered deletes.
Definition: DocumentsWriter.h:113
void remapDeletes(const SegmentInfosPtr &infos, Collection< Collection< int32_t > > docMaps, Collection< int32_t > delCounts, const OneMergePtr &merge, int32_t mergeDocCount)
Called whenever a merge has completed and the merged segments had deletions.
void waitReady(const DocumentsWriterThreadStatePtr &state)
bool bufferIsFull
Definition: DocumentsWriter.h:163
virtual ~DocumentsWriter()
TermPtr lastDeleteTerm
Definition: DocumentsWriter.h:184
void setFlushedDocCount(int32_t n)
HashSet< String > openFiles()
Returns Collection of files in use by this instance, including any flushed segments.
void balanceRAM()
We have four pools of RAM: Postings, byte blocks (holds freq/prox posting data), char blocks (holds c...
bool flushPending
Definition: DocumentsWriter.h:162
void setMaxBufferedDocs(int32_t count)
Set max buffered docs, which means we will flush by doc count instead of by RAM usage.
int32_t numDocsInRAM
Definition: DocumentsWriter.h:66
Collection< CharArray > freeCharBlocks
Definition: DocumentsWriter.h:109
static const int32_t INT_BLOCK_SIZE
Definition: DocumentsWriter.h:149
bool bufferDeleteQuery(const QueryPtr &query)
BufferedDeletesPtr deletesInRAM
Deletes done after the last flush; these are discarded on abort.
Definition: DocumentsWriter.h:79
void initFlushState(bool onlyDocStore)
int32_t docStoreOffset
Definition: DocumentsWriter.h:63
MapTermNum getBufferedDeleteTerms()
static const int32_t BYTE_BLOCK_SHIFT
Initial chunks size of the shared byte[] blocks used to store postings data.
Definition: DocumentsWriter.h:135
DocFieldProcessorPtr docFieldProcessor
Definition: DocumentsWriter.h:76
static const int32_t MAX_THREAD_STATE
Max # ThreadState instances; if there are more threads than this they share ThreadStates.
Definition: DocumentsWriter.h:69
static const int32_t BYTES_PER_DEL_TERM
Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER)....
Definition: DocumentsWriter.h:123
HashSet< String > _abortedFiles
List of files that were written before last abort()
Definition: DocumentsWriter.h:105
IntArray getIntBlock(bool trackAllocations)
HashSet< String > getFlushedFiles()
int32_t maxFieldLength
Definition: DocumentsWriter.h:166
static const int32_t BYTE_BLOCK_NOT_MASK
Definition: DocumentsWriter.h:138
void addDeleteQuery(const QueryPtr &query, int32_t docID)
String getDocStoreSegment()
Returns the current doc store segment we are writing to.
DocumentsWriter(const DirectoryPtr &directory, const IndexWriterPtr &writer, const IndexingChainPtr &indexingChain)
WaitQueuePtr waitQueue
Definition: DocumentsWriter.h:174
SkipDocWriterPtr skipDocWriter
Definition: DocumentsWriter.h:175
bool setFlushPending()
Set flushPending if it is not already set and returns whether it was set. This is used by IndexWriter...
bool hasProx()
Returns true if any of the fields in the current buffered docs have omitTermFreqAndPositions==false.
bool applyDeletes(const IndexReaderPtr &reader, int32_t docIDStart)
bool checkDeleteTerm(const TermPtr &term)
int32_t getMaxBufferedDeleteTerms()
void message(const String &message)
void addDeleteDocID(int32_t docID)
Buffer a specific docID for deletion. Currently only used when we hit a exception when adding a docum...
void createCompoundFile(const String &segment)
Build compound file for the segment we just flushed.
static const int32_t PER_DOC_BLOCK_SIZE
Definition: DocumentsWriter.h:152
void setSimilarity(const SimilarityPtr &similarity)
int64_t freeLevel
Definition: DocumentsWriter.h:94
The IndexingChain must define the getChain(DocumentsWriter) method which returns the DocConsumer that...
Definition: DocumentsWriter.h:423
virtual DocConsumerPtr getChain(const DocumentsWriterPtr &documentsWriter)=0
Base class for all Lucene classes.
Definition: LuceneObject.h:31
RAMFile buffer for DocWriters.
Definition: DocumentsWriter.h:381
PerDocBuffer(const DocumentsWriterPtr &docWriter)
DocumentsWriterWeakPtr _docWriter
Definition: DocumentsWriter.h:386
void recycle()
Recycle the bytes used.
virtual ByteArray newBuffer(int32_t size)
Allocate bytes used from shared pool.
File used as buffer in RAMDirectory.
Definition: RAMFile.h:15
Definition: DocumentsWriter.h:457
virtual int64_t sizeInBytes()
Definition: DocumentsWriter.h:469
WaitQueue(const DocumentsWriterPtr &docWriter)
int32_t nextWriteDocID
Definition: DocumentsWriter.h:481
void writeDocument(const DocWriterPtr &doc)
bool add(const DocWriterPtr &doc)
int32_t numWaiting
Definition: DocumentsWriter.h:483
int32_t nextWriteLoc
Definition: DocumentsWriter.h:482
int64_t waitingBytes
Definition: DocumentsWriter.h:484
Collection< DocWriterPtr > waiting
Definition: DocumentsWriter.h:480
DocumentsWriterWeakPtr _docWriter
Definition: DocumentsWriter.h:474
Definition: AbstractAllTermDocs.h:12
boost::shared_ptr< InfoStream > InfoStreamPtr
Definition: LuceneTypes.h:532
boost::shared_ptr< SkipDocWriter > SkipDocWriterPtr
Definition: LuceneTypes.h:226
boost::shared_ptr< Query > QueryPtr
Definition: LuceneTypes.h:420
boost::weak_ptr< IndexWriter > IndexWriterWeakPtr
Definition: LuceneTypes.h:160
boost::shared_ptr< OneMerge > OneMergePtr
Definition: LuceneTypes.h:192
boost::shared_ptr< DocFieldProcessor > DocFieldProcessorPtr
Definition: LuceneTypes.h:115
boost::shared_ptr< DocumentsWriter > DocumentsWriterPtr
Definition: LuceneTypes.h:123
boost::shared_ptr< SegmentWriteState > SegmentWriteStatePtr
Definition: LuceneTypes.h:222
boost::shared_ptr< DocConsumer > DocConsumerPtr
Definition: LuceneTypes.h:106
boost::shared_ptr< Similarity > SimilarityPtr
Definition: LuceneTypes.h:435
boost::shared_ptr< Analyzer > AnalyzerPtr
Definition: LuceneTypes.h:20
boost::weak_ptr< DocumentsWriter > DocumentsWriterWeakPtr
Definition: LuceneTypes.h:123
boost::shared_ptr< Term > TermPtr
Definition: LuceneTypes.h:233
boost::shared_ptr< BufferedDeletes > BufferedDeletesPtr
Definition: LuceneTypes.h:87
boost::shared_ptr< IndexingChain > IndexingChainPtr
Definition: LuceneTypes.h:156
boost::shared_ptr< Directory > DirectoryPtr
Definition: LuceneTypes.h:489
boost::shared_ptr< ByteBlockAllocator > ByteBlockAllocatorPtr
Definition: LuceneTypes.h:88
boost::shared_ptr< IndexReader > IndexReaderPtr
Definition: LuceneTypes.h:157
boost::shared_ptr< DocWriter > DocWriterPtr
Definition: LuceneTypes.h:125
boost::shared_ptr< DocumentsWriterThreadState > DocumentsWriterThreadStatePtr
Definition: LuceneTypes.h:124
boost::shared_ptr< Document > DocumentPtr
Definition: LuceneTypes.h:74
boost::shared_ptr< PerDocBuffer > PerDocBufferPtr
Definition: LuceneTypes.h:199
boost::shared_ptr< SegmentInfos > SegmentInfosPtr
Definition: LuceneTypes.h:210
boost::shared_ptr< WaitQueue > WaitQueuePtr
Definition: LuceneTypes.h:265
boost::shared_ptr< IndexWriter > IndexWriterPtr
Definition: LuceneTypes.h:160