¿Por qué Lucene.NET causa OutOfMemoryException al indexar archivos grandes?
apache lucene net (1)
He agregado el código mencionado anteriormente para IndexWriter
.
Yo he puesto
writer.SetRAMBufferSizeMB(32);
writer.MergeFactor = 1000;
writer.SetMaxFieldLength(Int32.MaxValue);
writer.UseCompoundFile = false;
toda la propiedad para evitar OutOfMemoryException(OOMException)
.
Aquí en este código en línea writer.AddDocument(document);
muestra la excepción OOM.
¿Puedes guiarme por qué soy este error?
¿Alguien puede ayudarme a resolver esto?
La configuración de mi máquina:
Tipo de sistema: sistema operativo de 64 bits.
RAM: 4 GB (3.86 GB utilizables)
Procesador: Intel i5 - CPU 3230M a 2.60 GHz
using System;
using System.Data.SqlClient;
using Lucene.Net.Documents;
using System.Data;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.Net.QueryParsers;
namespace ConsoleApplication1
{
class Program
{
static String searchTerm = "";
static void Main(string[] args) {
/**
* This will create dataset according to
* connectingString and query
**/
Console.WriteLine("Connecting to Sql database server.");
String connectionString = "Data Source=proxy-pc;Initial Catalog=Snomed; User ID=SA;password=admin";
String query = "SELECT * FROM DESCRIPTION";
String INDEX_DIRECTORY = "c://DatabaseIndex";
Console.WriteLine("Creating dataset.");
DataSet dataSet = createDataset(connectionString, query);
Console.WriteLine("Created dataset successfully.");
Console.WriteLine("Creating document.");
Document document = createDocument(dataSet);
Console.WriteLine("Created document successfully.");
var version = Lucene.Net.Util.Version.LUCENE_30;
var length = Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED;
Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(version);
Lucene.Net.Store.Directory directory = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(@INDEX_DIRECTORY));
Lucene.Net.Index.IndexWriter writer = new Lucene.Net.Index.IndexWriter(directory, analyzer, length);
writer.SetMergeScheduler(new Lucene.Net.Index.SerialMergeScheduler());
writer.SetRAMBufferSizeMB(32);
writer.MergeFactor = 1000;
writer.SetMaxFieldLength(Int32.MaxValue);
writer.UseCompoundFile = false;
Console.WriteLine("Before Adding document");
**writer.AddDocument(document); **
Console.WriteLine("Indexing...");
writer.Optimize();
writer.Dispose();
Console.WriteLine("Indexing finished");
if (searchTerm == "")
{
searchTerm = "(keyword)";
}
Console.WriteLine("Searching ''" + searchTerm + "''...");
var occurance = searchKeyword(INDEX_DIRECTORY, version, searchTerm);
if (occurance != -1)
{
Console.WriteLine("Your search found : " + occurance);
}
else
{
Console.WriteLine("Invalid index directory.");
}
Console.Read();
}
/**
* Method works as a searcher
**/
private static int searchKeyword(String index_Directory_Path, Lucene.Net.Util.Version version, String searchWord) {
if (index_Directory_Path != null)
{
var standAnalyzer = new StandardAnalyzer(version);
IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(index_Directory_Path));
// parse the query, "term" is the default field to search
var parser = new QueryParser(version, "term", standAnalyzer);
Query searchQuery = parser.Parse(searchWord);
// search
TopDocs hits = searcher.Search(searchQuery, 100);
var total = hits.TotalHits;
return total;
}
else
{
return -1;
}
}
static DataSet createDataset(String connectionString, String query) {
DataSet ds = new DataSet();
using (SqlConnection connection = new SqlConnection(connectionString))
using (SqlCommand command = new SqlCommand(query, connection))
using (SqlDataAdapter adapter = new SqlDataAdapter(command))
{
adapter.Fill(ds);
}
return ds;
}
static Lucene.Net.Documents.Document createDocument(DataSet dataSet) {
Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
using (dataSet)
{
foreach (DataTable table in dataSet.Tables)
{
foreach (DataRow row in table.Rows)
{
String id = row["id"].ToString();
String rTime = row["rTime"].ToString();
String active = row["active"].ToString();
String mId = row["mId"].ToString();
String cId = row["cId"].ToString();
String lCode = row["lCode"].ToString();
String tId = row["tId"].ToString();
String detail = row["detail"].ToString();
String sId = row["sId"].ToString();
Field idField = new Field("id", id, Field.Store.YES, Field.Index.ANALYZED);
Field rTimeField = new Field("rTime", rTime, Field.Store.YES, Field.Index.ANALYZED);
Field activeField = new Field("active", active, Field.Store.YES, Field.Index.ANALYZED);
Field mIdField = new Field("mId", mId, Field.Store.YES, Field.Index.ANALYZED);
Field cIdField = new Field("cId", cId, Field.Store.YES, Field.Index.ANALYZED);
Field lCodeField = new Field("lCode", lCode, Field.Store.YES, Field.Index.ANALYZED);
Field tIdField = new Field("tId", tId, Field.Store.YES, Field.Index.ANALYZED);
Field detailField = new Field("detail", detail, Field.Store.YES, Field.Index.ANALYZED);
Field sIdField = new Field("sId", sId, Field.Store.YES, Field.Index.ANALYZED);
doc.Add(idField);
doc.Add(rTimeField);
doc.Add(activeField);
doc.Add(mIdField);
doc.Add(cIdField);
doc.Add(lCodeField);
doc.Add(tIdField);
doc.Add(detailField);
doc.Add(sIdField);
}
}
}
return doc;
}
}
}
Parece que está agregando toda la base de datos como un documento único.
¿Has intentado agregar cada fila como un documento separado? Tal vez podría cambiar "createDocument" por "createDocuments" y obtener un solo documento de Lucene.Net por fila. Eso dejaría la mayor parte de tu código actual sin cambios ...
Espero que esto ayude,