- •Оглавление
- •Введение
- •Описание платформы Java
- •Платформа Java
- •Виртуальная машина Java
- •Особенности языка Java
- •Классификация платформ Java
- •Практическая часть
- •Назначение приложения
- •Общие сведения о работе приложения
- •Структура таблиц базы данных приложения
- •Описание разработки приложения
- •Тестирование и отладка
- •Заключение
- •Список литературы
- •Приложение а
- •Класс работы с файлом настроек приложение б.1
- •Приложение б.2
Приложение б.2
Функция фильтрации
public ArrayList<Object[]> Filter(ArrayList<String> Dirty){
// filter for word array
ArrayList<String> CleanArray = new ArrayList<String>();
// source array
ArrayList<String> FinalList = new ArrayList<String>();
// after all filters excepting duplicate filter
ArrayList<Object[]> RatedFinalList = new ArrayList<Object[]>();
// after all filters
WordBase = GetWordBase();
for (String CurWord : Dirty){
if (CurWord.length() > 2){
// delete very small words
if (WordBase.contains(CurWord) == false & Bases[0].contains(CurWord) == false){
// if the word is new and not a name
CleanArray.add(CurWord);
}
}
}
System.out.println("Count before filters: "+CleanArray.size());
for (String CurString : CleanArray){
// cut 2nd form
if (Boolean.parseBoolean(Settings.GetProps(SettingFile, "F_ed"))
& CurString.endsWith("ed") & CurString.length() > 3){
String CutD = CurString.substring(0, CurString.length()-1);
// for words like compare - compared
String CutED = CurString.substring(0, CurString.length()-2);
// for words like enter - entered
String CutDoubles = CurString.substring(0, CurString.length()-3);
// for words like stop - stopped
if (CleanArray.contains(CutD) == false
& WordBase.contains(CutD) == false){
if(Bases[1].contains(CutD) & CutD.length() > 2){
FinalList.add(CutD);
} else
if(CleanArray.contains(CutED) == false
& WordBase.contains(CutED) == false){
if(Bases[1].contains(CutED) & CutED.length() > 2){
FinalList.add(CutED);
} else
if(CurString.charAt(CurString.length()-3) == CurString.charAt(CurString.length()-4)){
if(CleanArray.contains(CutDoubles) == false
& WordBase.contains(CutDoubles) == false){
if(Bases[1].contains(CutDoubles) & CutDoubles.length() > 2){
FinalList.add(CutDoubles);
} else FinalList.add(CurString);
}
} else FinalList.add(CurString);
}
}
} else
// cut 3rd person and plural form
if (Boolean.parseBoolean(Settings.GetProps(SettingFile, "F_s"))
& CurString.endsWith("s")){
String CutS = CurString.substring(0, CurString.length()-1);
String CutES = CurString.substring(0, CurString.length()-2);
String CutIES = CurString.substring(0, CurString.length()-3);
if (CurString.endsWith("sses")){ // like bosses
if(CleanArray.contains(CutES) == false & WordBase.contains(CutES) == false){
if (Bases[1].contains(CutES) & CutES.length() > 2){
FinalList.add(CutES);
// correct derivatives (bosses -> boss)
} else FinalList.add(CurString);
}
} else
// for words like category/categories
if(CurString.endsWith("ies") & (CleanArray.contains(CutIES+"y")
|| Bases[1].contains(CutIES+"y"))){
FinalList.add(CutIES+"y");
} else
if (CleanArray.contains(CutS) == false & WordBase.contains(CutS) == false){
if (Bases[1].contains(CutS) & CutS.length() > 2){
FinalList.add(CutS); // correct derivatives (gets -> get)
} else FinalList.add(CurString);
}
} else
if (Boolean.parseBoolean(Settings.GetProps(SettingFile, "F_ing"))
& CurString.endsWith("ing")){
String CutIng = CurString.substring(0, CurString.length()-3);
String CuttIng = CurString.substring(0, CurString.length()-4);
//for words like make/making
if (CleanArray.contains(CutIng) == false & WordBase.contains(CutIng) == false){
if(CutIng.length() > 2 & (CleanArray.contains(CutIng+"e") || Bases[1].contains(CutIng+"e"))){
if(WordBase.contains(CutIng+"e") == false ){
FinalList.add(CutIng+"e");
}
} else
if (Bases[1].contains(CutIng) & CutIng.length() > 1){
FinalList.add(CutIng);
} else
if(CurString.length() > 6){
if(CurString.charAt(CurString.length()-4) == CurString.charAt(CurString.length()-5)
& WordBase.contains(CuttIng) == false){
FinalList.add(CuttIng);
}
} else
{
FinalList.add(CurString);
}
}
} else
// cut adverbs
if (Boolean.parseBoolean(Settings.GetProps(SettingFile, "F_ly"))
& CurString.length() > 4 & CurString.endsWith("ly")){
String Cut = CurString.substring(0, CurString.length()-2);
if(CurString.endsWith("cally")) {
// for words like basically
Cut = CurString.substring(0, CurString.length()-4);
if(CleanArray.contains(Cut) == false
& WordBase.contains(Cut) == false){
if (Bases[1].contains(Cut) & Cut.length() > 2){
FinalList.add(Cut);
} else FinalList.add(CurString);
}
} else
if(CleanArray.contains(Cut) == false
& WordBase.contains(Cut) == false){
if (Bases[1].contains(Cut) & Cut.length() > 2){
FinalList.add(Cut);
} else FinalList.add(CurString);
}
} else
if (Boolean.parseBoolean(Settings.GetProps(SettingFile, "F_er")) & CurString.length() > 4
& (CurString.endsWith("er") || CurString.endsWith("est"))){
//for words like big/bigger/biggest
if(CurString.endsWith("er")){
String CutER = CurString.substring(0, CurString.length()-2);
String CuttER = CurString.substring(0, CurString.length()-3);
if (CleanArray.contains(CutER) == false & WordBase.contains(CutER) == false){
// for regular case (great/greater)
if (Bases[1].contains(CutER) & CutER.length() > 1){
FinalList.add(CutER);
} else
// for words like big/bigger with double letter
if(CurString.length() > 4 & CurString.charAt(CurString.length()-3)
== CurString.charAt(CurString.length()-4) & WordBase.contains(CuttER) == false
& Bases[1].contains(CuttER)){
//out(CurString);
FinalList.add(CuttER);
} else
// for words like easy/easier
if(CurString.charAt(CurString.length()-3) == (char) 'i'
& (CleanArray.contains(CuttER+"y") || Bases[1].contains(CuttER+"y"))){
FinalList.add(CuttER+"y");
//out(CurString);
} else
{
FinalList.add(CurString);
}
}
} else
if(CurString.endsWith("est")){
String CutEST = CurString.substring(0, CurString.length()-3);
String CuttEST = CurString.substring(0, CurString.length()-4);
if (CleanArray.contains(CutEST) == false & WordBase.contains(CutEST) == false){
// for regular case (great/greatest)
if (Bases[1].contains(CutEST) & CutEST.length() > 1){
FinalList.add(CutEST);
} else
if(CurString.length() > 5 & CurString.charAt(CurString.length()-4)
== CurString.charAt(CurString.length()-5) & WordBase.contains(CuttEST) == false
& Bases[1].contains(CuttEST)){
FinalList.add(CuttEST);
} else
if(CurString.charAt(CurString.length()-4) == (char) 'i'
& (CleanArray.contains(CuttEST+"y") || Bases[1].contains(CuttEST+"y"))){
FinalList.add(CuttEST+"y");
} else
FinalList.add(CurString);
}
//out(CurString);
} else
FinalList.add(CurString);
} else
FinalList.add(CurString);
}
Collections.sort(FinalList);
TotalCount = FinalList.size();
out("Count with duplicates: "+ TotalCount);
int k = 0;
int c = 1;
for(String word : FinalList){
// get statistics
if (k < FinalList.size()-1){
if (word.equals(FinalList.get(k + 1))) {
c++;
// if current and next word are equals
} else {
Object Table[] = new Object[3];
// if not - we done, now we need to save the count to the array
Table[1] = c;
Table[0] = word;
RatedFinalList.add(Table);
c = 1;
// reset count for new word
}
} else {
Object Table[] = new Object[3];
// if not - we done, now we need to save the count to the array
Table[1] = c;
Table[0] = word;
RatedFinalList.add(Table);
c = 1;
// reset count for new word
}
k++;
}
for(Object[] i : RatedFinalList){
// clean WordBase
String CurString = i[0].toString();
if(Boolean.parseBoolean(Settings.GetProps(SettingFile, "F_ed"))
& WordBase.contains(CurString + CurString.substring(CurString.length()-1) + "ed")){
// update words like stopped, tarred
Replace(WordBase, CurString + CurString.substring(CurString.length()-1) + "ed", CurString);
} else
if (Boolean.parseBoolean(Settings.GetProps(SettingFile, "F_ed")) & WordBase.contains(CurString+"d")){
Replace(WordBase, CurString+"d", CurString); // update word in WordBase, set word without ed there
} else
if (Boolean.parseBoolean(Settings.GetProps(SettingFile, "F_s")) & WordBase.contains(CurString+"s")){
Replace(WordBase, CurString+"s", CurString); // update word in WordBase, set word without s there
} else
if (Boolean.parseBoolean(Settings.GetProps(SettingFile, "F_s")) & WordBase.contains(CurString+"es")){
Replace(WordBase, CurString+"es", CurString); // update word in WordBase, set word without s there
} else
if (Boolean.parseBoolean(Settings.GetProps(SettingFile, "F_ly")) & WordBase.contains(CurString+"ly")){
Replace(WordBase, CurString+"ly", CurString); // update word in WordBase, set word without ly there
} else
if (Boolean.parseBoolean(Settings.GetProps(SettingFile, "F_ing")) & WordBase.contains(CurString+"ing")
|| WordBase.contains(CurString.substring(0, CurString.length()-1) + "ing")
|| WordBase.contains(CurString + CurString.charAt(CurString.length()-1) + "ing")){
String PlustIng = CurString + CurString.charAt(CurString.length()-1) + "ing";
// for words like getting
out(PlustIng);
if(CurString.endsWith("e")){
Replace(WordBase, CurString.substring(0, CurString.length()-1)+"ing", CurString);
} else
if(WordBase.contains(PlustIng)){
Replace(WordBase, PlustIng, CurString);
} else
Replace(WordBase, CurString+"ing", CurString); // update word in WordBase, set word without ing there
} else
if (Boolean.parseBoolean(Settings.GetProps(SettingFile, "F_ly")) & WordBase.contains(CurString+"ally")){
Replace(WordBase, CurString+"ally", CurString); // update word in WordBase, set word without ly there
} else
if (Boolean.parseBoolean(Settings.GetProps(SettingFile, "F_ed")) & WordBase.contains(CurString+"ed")){
if(CleanArray.contains(CurString+"e") == false || Bases[1].contains(CurString+"e") == false){
Replace(WordBase, CurString+"ed", CurString);
// update word in WordBase, set word without d there for words like compare
}
}
}
out("Count after filters: "+RatedFinalList.size());
return RatedFinalList;
}