⛏️ index : haiku.git

/*
 * Copyright 2010, Haiku.
 * Distributed under the terms of the MIT License.
 *
 * Authors:
 *		based on previous work of Ankur Sethi
 *		Clemens Zeidler <haiku@clemens-zeidler.de>
 */

#include "CLuceneDataBase.h"

#include <Directory.h>
#include <File.h>
#include <TranslatorRoster.h>


#define DEBUG_CLUCENE_DATABASE
#ifdef DEBUG_CLUCENE_DATABASE
#include <stdio.h>
#	define STRACE(x...) printf("FT: " x)
#else
#	define STRACE(x...) ;
#endif


using namespace lucene::document;
using namespace lucene::util;


const uint8 kCluceneTries = 10;


wchar_t* to_wchar(const char *str)
{
	int size = strlen(str) * sizeof(wchar_t) ;
	wchar_t *wStr = new wchar_t[size] ;

	if (mbstowcs(wStr, str, size) == -1) {
		delete[] wStr ;
		return NULL ;
	} else
		return wStr ;
}


CLuceneWriteDataBase::CLuceneWriteDataBase(const BPath& databasePath)
	:
	fDataBasePath(databasePath),
	fTempPath(databasePath),
	fIndexWriter(NULL)
{
	printf("CLuceneWriteDataBase fDataBasePath %s\n", fDataBasePath.Path());
	create_directory(fDataBasePath.Path(), 0755);

	fTempPath.Append("temp_file");
}


CLuceneWriteDataBase::~CLuceneWriteDataBase()
{
	// TODO: delete fTempPath file
}


status_t
CLuceneWriteDataBase::InitCheck()
{

	return B_OK;
}


status_t
CLuceneWriteDataBase::AddDocument(const entry_ref& ref)
{
	// check if already in the queue
	for (unsigned int i = 0; i < fAddQueue.size(); i++) {
		if (fAddQueue.at(i) == ref)
			return B_OK;
	}
	fAddQueue.push_back(ref);

	return B_OK;
}


status_t
CLuceneWriteDataBase::RemoveDocument(const entry_ref& ref)
{
	// check if already in the queue
	for (unsigned int i = 0; i < fAddQueue.size(); i++) {
		if (fDeleteQueue.at(i) == ref)
			return B_OK;
	}
	fDeleteQueue.push_back(ref);
	return B_OK;
}


status_t
CLuceneWriteDataBase::Commit()
{
	if (fAddQueue.size() == 0 && fDeleteQueue.size() == 0)
		return B_OK;
	STRACE("Commit\n");

	_RemoveDocuments(fAddQueue);
	_RemoveDocuments(fDeleteQueue);
	fDeleteQueue.clear();

	if (fAddQueue.size() == 0)
		return B_OK;

	fIndexWriter = _OpenIndexWriter();
	if (fIndexWriter == NULL)
		return B_ERROR;

	status_t status = B_OK;
	for (unsigned int i = 0; i < fAddQueue.size(); i++) {
		if (!_IndexDocument(fAddQueue.at(i))) {
			status = B_ERROR;
			break;
		}
	}

	fAddQueue.clear();
	fIndexWriter->close();
	delete fIndexWriter;
	fIndexWriter = NULL;

	return status;
}


IndexWriter*
CLuceneWriteDataBase::_OpenIndexWriter()
{
	IndexWriter* writer = NULL;
	for (int i = 0; i < kCluceneTries; i++) {
		try {
			bool createIndex = true;
			if (IndexReader::indexExists(fDataBasePath.Path()))
				createIndex = false;

			writer = new IndexWriter(fDataBasePath.Path(),
				&fStandardAnalyzer, createIndex);
			if (writer)
				break;
		} catch (CLuceneError &error) {
			STRACE("CLuceneError: _OpenIndexWriter %s\n", error.what());
			delete writer;
			writer = NULL;
		}
	}
	return writer;
}


IndexReader*
CLuceneWriteDataBase::_OpenIndexReader()
{
	IndexReader* reader = NULL;

	BEntry entry(fDataBasePath.Path(), NULL);
	if (!entry.Exists())
		return NULL;

	for (int i = 0; i < kCluceneTries; i++) {
		try {
			if (!IndexReader::indexExists(fDataBasePath.Path()))
				return NULL;

			reader = IndexReader::open(fDataBasePath.Path());
			if (reader)
				break;
		} catch (CLuceneError &error) {
			STRACE("CLuceneError: _OpenIndexReader %s\n", error.what());
			delete reader;
			reader = NULL;
		}
	}

	return reader;
}


bool
CLuceneWriteDataBase::_RemoveDocuments(std::vector<entry_ref>& docs)
{
	IndexReader *reader = NULL;
	reader = _OpenIndexReader();
	if (!reader)
		return false;
	bool status = false;

	for (unsigned int i = 0; i < docs.size(); i++) {
		BPath path(&docs.at(i));
		wchar_t* wPath = to_wchar(path.Path());
		if (wPath == NULL)
			continue;
		
		for (int i = 0; i < kCluceneTries; i++) {
			status = _RemoveDocument(wPath, reader);
			if (status)
				break;
			reader->close();
			delete reader;
			reader = _OpenIndexReader();
			if (!reader) {
				status = false;
				break;
			}
		}
		delete[] wPath;

		if (!status)
			break;
	}

	reader->close();
	delete reader;

	return status;
}


bool
CLuceneWriteDataBase::_RemoveDocument(wchar_t* wPath, IndexReader* reader)
{
	try {
		Term term(_T("path"), wPath);
		reader->deleteDocuments(&term);
	} catch (CLuceneError &error) {
		STRACE("CLuceneError: deleteDocuments %s\n", error.what());
		return false;
	}
	return true;
}


bool
CLuceneWriteDataBase::_IndexDocument(const entry_ref& ref)
{
	BPath path(&ref);

	BFile inFile, outFile;
	inFile.SetTo(path.Path(), B_READ_ONLY);
	if (inFile.InitCheck() != B_OK) {
		STRACE("Can't open inFile %s\n", path.Path());
		return false;
	}
	outFile.SetTo(fTempPath.Path(),
		B_READ_WRITE | B_CREATE_FILE | B_ERASE_FILE);
	if (outFile.InitCheck() != B_OK) {
		STRACE("Can't open outFile %s\n", fTempPath.Path());
		return false;
	}

	BTranslatorRoster* translatorRoster = BTranslatorRoster::Default();
	if (translatorRoster->Translate(&inFile, NULL, NULL, &outFile, 'TEXT')
		!= B_OK)
		return false;

	inFile.Unset(); 
	outFile.Unset();

	FileReader* fileReader = new FileReader(fTempPath.Path(), "UTF-8");
	wchar_t* wPath = to_wchar(path.Path());
	if (wPath == NULL)
		return false;

	Document *document = new Document;
	Field contentField(_T("contents"), fileReader,
		Field::STORE_NO | Field::INDEX_TOKENIZED);
	document->add(contentField);
	Field pathField(_T("path"), wPath,
		Field::STORE_YES | Field::INDEX_UNTOKENIZED);
	document->add(pathField);

	bool status = true;
	for (int i = 0; i < kCluceneTries; i++) {
		try {
			fIndexWriter->addDocument(document);
			STRACE("document added, retries: %i\n", i);
			break;
		} catch (CLuceneError &error) {
			STRACE("CLuceneError addDocument %s\n", error.what());
			fIndexWriter->close();
			delete fIndexWriter;
			fIndexWriter = _OpenIndexWriter();
			if (fIndexWriter == NULL) {
				status = false;
				break;
			}
		}
	}

	if (!status)
		delete document;
	delete[] wPath;
	return status;
}