/* Copyright 2004-2011 by the National and Technical University of Athens This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program. If not, see . */ package org.ivml.alimo; /** * @author Giorgos Stoilos (gstoil@image.ece.ntua.gr) * * This class implements the string matching method proposed in the paper * "A String Metric For Ontology Alignment", published in ISWC 2005 * */ public class I_Sub { /** * @param s1 input string 1 * @param s2 input string 2 * @param normaliseStrings a boolean value that specifies whether the two strings are to be normalised by * a custom normalisation algorithm that basically removes punctuation symbols and converts both input strings * to lower-case. Note that without normalisation the method is case sensitive. * * @return degree of similarity between the two strings. */ public double score(String s1, String s2, boolean normaliseStrings) { if( (s1 == null) || (s2 == null) ) return -1; String inputStr1 = s1; String inputStr2 = s2; if( normaliseStrings ){ s1 = s1.toLowerCase(); s2 = s2.toLowerCase(); s1 = normalizeString( s1 , '.' ); s2 = normalizeString( s2 , '.' ); s1 = normalizeString( s1 , '_' ); s2 = normalizeString( s2 , '_' ); s1 = normalizeString( s1 , ' ' ); s2 = normalizeString( s2 , ' ' ); } int l1 = s1.length(); // length of s int l2 = s2.length(); // length of t int L1 = l1; int L2 = l2; if ((L1 == 0) && (L2 == 0)) return 1; if ((L1 == 0) || (L2 == 0)) return -1; double common = 0; int best = 2; while( s1.length() > 0 && s2.length() > 0 && best != 0 ) { best = 0; // the best subs length so far l1 = s1.length(); // length of s l2 = s2.length(); // length of t int i = 0; // iterates through s1 int j = 0; // iterates through s2 int startS2 = 0; int endS2 = 0; int startS1 = 0; int endS1 = 0; int p=0; for( i = 0; (i < l1) && (l1 - i > best); i++) { j = 0; while (l2 - j > best) { int k = i; for(;(j < l2) && (s1.charAt(k) != s2.charAt(j)); j++); if (j != l2) { // we have found a starting point p = j; for (j++, k++; (j < l2) && (k < l1) && (s1.charAt(k) == s2.charAt(j)); j++, k++); if( k-i > best){ best = k-i; startS1 = i; endS1 = k; startS2 = p; endS2 = j; } } } } char[] newString = new char[ s1.length() - (endS1 - startS1) ]; j=0; for( i=0 ;i=startS1 && i< endS1 ) continue; newString[j++] = s1.charAt( i ); } s1 = new String( newString ); newString = new char[ s2.length() - ( endS2 - startS2 ) ]; j=0; for( i=0 ;i=startS2 && i< endS2 ) continue; newString[j++] = s2.charAt( i ); } s2 = new String( newString ); if( best > 2 ) common += best; else best = 0; } double commonality = 0; double scaledCommon = (double)(2*common)/(L1+L2); commonality = scaledCommon; double winklerImprovement = winklerImprovement(inputStr1, inputStr2, commonality); double dissimilarity = 0; double rest1 = L1 - common; double rest2 = L2 - common; double unmatchedS1 = Math.max( rest1 , 0 ); double unmatchedS2 = Math.max( rest2 , 0 ); unmatchedS1 = rest1/L1; unmatchedS2 = rest2/L2; /** Hamacher Product */ double suma = unmatchedS1 + unmatchedS2; double product = unmatchedS1 * unmatchedS2; double p = 0.6; //For 1 it coincides with the algebraic product if( (suma-product) == 0 ) dissimilarity = 0; else dissimilarity = (product)/(p+(1-p)*(suma-product)); return commonality - dissimilarity + winklerImprovement; } private double winklerImprovement(String s1, String s2, double commonality) { int i; int n = Math.min( s1.length() , s2.length() ); for( i=0 ; i