Reputation: 133
I've this issue in my project. I read my .xlsx excel file using Apache Poi and I want to index them in my Solr core. I use SolrInputDocument to index reading file. Here is my java codes
package org.solr;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Iterator;
import java.util.ArrayList;
import java.util.List;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.ORDER;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
public class PoiJava {
private static final String fileName="C:\\Users\\FTK1187\\Desktop\\E-Archive - Copy\\TableArchive.xlsx";
public static void main(String Args[]) throws SolrServerException {
List dataList=getArchiveData();
}
private static List getArchiveData() throws SolrServerException {
List dataList =new ArrayList();
FileInputStream excelFile=null;
try {
excelFile = new FileInputStream(new File(fileName));
Workbook workbook = new XSSFWorkbook(excelFile);
Sheet datatypeSheet = workbook.getSheetAt(0);
Iterator<Row> iterator = datatypeSheet.iterator();
String urlString="http://localhost:8983/solr/archiveCore";
SolrClient solr=new HttpSolrClient.Builder(urlString).build();
SolrInputDocument document=new SolrInputDocument();
if(!document.isEmpty())
{
solr.deleteByQuery("*");
solr.commit();
}
while (iterator.hasNext()) {
Row currentRow = iterator.next();
Iterator<Cell> cellIterator = currentRow.iterator();
while (cellIterator.hasNext()) {
Cell currentCell = cellIterator.next();
//getCellTypeEnum shown as deprecated for version 3.15
//getCellTypeEnum ill be renamed to getCellType starting from version 4.0
if (currentCell.getCellTypeEnum() == CellType.STRING) {
//System.out.println(currentCell.getStringCellValue());
for(int i=0;i<currentRow.getLastCellNum();i++)
{
if(currentCell.getColumnIndex()==1)
{
document.addField("NameAdded", currentCell.getStringCellValue());
}
else if(currentCell.getColumnIndex()==2)
{
document.addField("DateAdded", currentCell.getStringCellValue());
}
else if(currentCell.getColumnIndex()==3)
{
document.addField("NameModified", "");
}
else if(currentCell.getColumnIndex()==4)
{
document.addField("DateModified", "");
}
else if(currentCell.getColumnIndex()==5)
{
document.addField("strSO", currentCell.getStringCellValue());
}
else if(currentCell.getColumnIndex()==6)
{
document.addField("strCust", "");
}
else if(currentCell.getColumnIndex()==7)
{
document.addField("strOperator", "");
}
else if(currentCell.getColumnIndex()==8)
{
document.addField("PackName", currentCell.getStringCellValue());
}
else if(currentCell.getColumnIndex()==9)
{
document.addField("DocName", currentCell.getStringCellValue());
}
else if(currentCell.getColumnIndex()==10)
{
document.addField("DocType", currentCell.getStringCellValue());
}
else if(currentCell.getColumnIndex()==11)
{
document.addField("extType", currentCell.getStringCellValue());
}
else if(currentCell.getColumnIndex()==12)
{
document.addField("FileName", currentCell.getStringCellValue());
}
else if(currentCell.getColumnIndex()==13)
{
document.addField("FilePath", currentCell.getStringCellValue());
}
else if(currentCell.getColumnIndex()==14)
{
document.addField("NameDeleted", "");
}
else if(currentCell.getColumnIndex()==15)
{
document.addField("DateDeleted", "");
}
else if(currentCell.getColumnIndex()==16)
{
document.addField("intRev", currentCell.getStringCellValue());
}
}
} else if (currentCell.getCellTypeEnum() == CellType.NUMERIC) {
//System.out.println(currentCell.getNumericCellValue());
for(int k=0;k<currentRow.getLastCellNum();k++)
{
if(currentCell.getColumnIndex()==0)
{
document.addField("id", currentCell.getNumericCellValue());
}
}
}
UpdateResponse response=solr.add(document);
solr.commit();
}
//System.out.println();
System.out.println(document.getField("id"));
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}
catch(IOException e) {
e.printStackTrace();
}
return dataList;
}
}
So when I'm running my project it gives me this error.
Exception in thread "main" org.apache.solr.client.solrj.impl.HttpSolrClient$RemoteSolrException: Error from server at http://localhost:8983/solr/archiveCore: Document is missing mandatory uniqueKey field: id
at org.apache.solr.client.solrj.impl.HttpSolrClient.executeMethod(HttpSolrClient.java:610)
at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:279)
at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:268)
at org.apache.solr.client.solrj.SolrRequest.process(SolrRequest.java:149)
at org.apache.solr.client.solrj.SolrClient.add(SolrClient.java:173)
at org.apache.solr.client.solrj.SolrClient.add(SolrClient.java:138)
at org.apache.solr.client.solrj.SolrClient.add(SolrClient.java:152)
at org.solr.PoiJava.getArchiveData(PoiJava.java:148)
at org.solr.PoiJava.main(PoiJava.java:33)
When I'm indexing files using SimplePostTool there is not error like that but I want to update my core in my web page.
Upvotes: 0
Views: 571
Reputation: 623
You probably have in your schema a field set as the unique key like this:
<uniqueKey>id</uniqueKey>
The problem is that when you upload a doc, in this case via Apache POI, you are not sending a value for that unique field.
You have a couple options:
<copyField source="excel_guaranteed_unique" dest="id"/>
As you have the actual document, you could just add a UUID to the "id" field.
Create a unique field like a UUID updating your RequestHandlder, like this:
<updateRequestProcessorChain name="uuid" >
<processor class="solr.UUIDUpdateProcessorFactory">
<str name="fieldName">id</str>
</processor>
...
</updateRequestProcessorChain>
...
<requestHandler name="/update" class="solr.UpdateRequestHandler">
<lst name="defaults">
<str name="update.chain">uuid</str>
</lst>
</requestHandler>
You also need to update the extract handler:
<requestHandler name="/update/extract"
startup="lazy"
class="solr.extraction.ExtractingRequestHandler" >
<lst name="defaults">
...
<str name="update.chain">uuid</str>
</lst>
Upvotes: 2