With a little side of applesauce...

Wednesday, August 12, 2009

Coldfusion - work-around for slow searching on large groups

I found that CFLDAP was pretty slow on searching the "member" attribute in large groupOfNames objects, (ie 4000+ members). Since we are using OpenLDAP 2.3, I can't do a search on memberOf, so I tested until I seem to have found the problem.

CFLDAP uses JNDI, and BasicAttribute's get() function returns the values as an array. Since CFLDAP returns our search as a query object, it must iterate through the multi-value array returned by get(), and then add that value to the query. I was able to recreate this using the JNDI calls manually, so I bypassed this iteration by using the debugging toString() function, and then removing the attribute name and semi-colon from the front of the line.

The following "javaSearch" function below is a quickly written kludge to replace my calls to openLDAP using CFLDAP, and should duplicate CFLDAP query returns with both attributes set to "*", as well as explicitly listed, (ie cn, dn, mail). It has replaced our CFLDAP usage for large group searches, but has not been tested extensively by any means. I hope that others can find it useful :)

    <cffunction name="javaSearch" 
returnType="query"
access="public"
output="no"
hint="perform LDAP searches
@param serverAddr string required ldap server
@param port string required ldap server port
@param username string required bind user
@param password string required bind password
@param basedn string required basedn from which we search
@param attributes string attributes to return (default *)
@param filter string filter for search (default *)
@param scope string type of search OBJECT_SCOPE,ONELEVEL_SCOPE,SUBTREE_SCOPE (default SUBTREE_SCOPE)
@param separator string delimiter for multiple attribute returns (default |)

@return query LDAP search results">

<cfargument name="serverAddr" type="string" required="yes">
<cfargument name="port" type="string" required="yes">
<cfargument name="username" type="string" required="yes">
<cfargument name="password" type="string" required="yes">
<cfargument name="basedn" type="string" required="yes">
<cfargument name="attributes" type="string" default="*">
<cfargument name="filter" type="string" default="*">
<cfargument name="scope" type="string" default="SUBTREE_SCOPE">
<cfargument name="separator" type="string" default="|">


<cfset searchBase = basedn >
<cfif port EQ "636">
<cfset protocol = "ldaps">
<cfelse>
<cfset protocol = "ldap">
</cfif>
<cfset env = Hashtable.init() >
<cfset env.put(Context.INITIAL_CONTEXT_FACTORY, "com.sun.jndi.ldap.LdapCtxFactory")>

<cfset env.put(Context.PROVIDER_URL, "#protocol#://#serverAddr#:#port#/")>
<cfset env.put(Context.SECURITY_AUTHENTICATION, "simple")>
<cfset env.put(Context.SECURITY_PRINCIPAL, "#username#")>
<cfset env.put(Context.SECURITY_CREDENTIALS, "#password#")>

<cftry>
<!--- ctx is of type DirContext --->
<cfset ctx = InitialDirContext.init(env) />
<cfset attributes = REReplaceNoCase( "#attributes#", ' ', '', 'ALL' ) />

<cfset ctls = SearchControls.init()>
<cfset ctls.setSearchScope( SearchControls.SUBTREE_SCOPE )>

<cfset filter = "#filter#">

<cfset answer = ctx.search( searchBase, filter, ctls )>
<!--- if filter is * we create
query
NAME VALUE
1 jpegPhoto [empty string]


if filter is 'cn, dn, mail', the output is
CN DN MAIL
1 mycn cn=mycn, ou=people,dc=example,dc=com myemail@email.com
--->
<!--- create our query to return the values --->
<cfif attributes EQ "*">
<cfset query = QueryNew( "NAME, VALUE" )>

<!--- loop through answer, until there are no more values --->
<cfloop condition="answer.hasMore()">
<cfset result = answer.next()>

<!--- since attributes = *, we grab and loop through them. return all attributes in a NAME, VALUE query --->
<cfset getAllAttributes = result.getAttributes().getAll()>
<cfloop condition = "getAllAttributes.hasMore()">
<cfset attr = getAllAttributes.next()>
<cfset newRow = QueryAddRow( query )>

<!--- get the attributes name --->
<cfset attributeName="#attr.getID()#">
<cfif attributeName NEQ "dn">
<!--- check the number of values which are returned --->
<cfset attributeValueSize = result.getAttributes().get('#attributeName#').size()>
<cfif attributeValueSize EQ 1>
<cfset attributeValue = result.getAttributes().get( '#attributeName#' ).get( 0 ) >

<!--- now we set our new row in the query --->
<cfset QuerySetCell( query, "NAME", attributeName ) >
<cfset QuerySetCell( query, "VALUE", attributeValue ) >
<cfelse>
<cfset attributeValueList ="#result.getAttributes().get( '#attributeName#' ).toString()#">
<!--- now we set our new row in the query --->
<cfset QuerySetCell( query, "NAME", '#attributeName#')>
<cfset QuerySetCell( query, "VALUE", REReplaceNoCase( attributeValueList, '#attributeName#: ', '')) >
</cfif>
<cfelse>
<cfif attributes EQ "*">
<cfset QuerySetCell( query, "NAME", "dn")>
<cfset QuerySetCell( query, "VALUE", "#result.getName()#,#searchBase#" ) >
<cfelse>
<cfset QuerySetCell( query, "dn", "#result.getName()#,#searchBase#" ) >
</cfif>
</cfif>
</cfloop>
</cfloop>
<cfelse>
<cfset query = QueryNew( attributes )>

<!--- loop through answer, until there are no more values --->
<cfloop condition="answer.hasMore()">
<cfset result = answer.next()>
<cfset newRow = QueryAddRow( query )>

<!--- split our attributes string, and loop through them. append each name=value pair to the query --->
<cfloop list="#attributes#" delimiters="," index="attributeName">
<cfif attributeName NEQ "dn">
<!--- check the value size --->
<cfset attributeValueSize = result.getAttributes().get( '#attributeName#' ).size()>
<cfif attributeValueSize EQ 1>
<cfset attributeValue = result.getAttributes().get( '#attributeName#' ).get( 0 ) >
<!--- add a new row to our query --->
<cfset QuerySetCell( query, "#attributeName#", attributeValue) >
<cfelse>
<cfset attributeValueList ="#result.getAttributes().get( '#attributeName#' ).toString()#">
<cfset QuerySetCell( query, "#attributeName#", REReplaceNoCase( attributeValueList, '#attributeName#: ', '')) >
</cfif>
<cfelse>
<cfset QuerySetCell( query, "dn", "#result.getName()#,#searchBase#" ) >
</cfif>
</cfloop>
</cfloop>
</cfif>
<cfcatch type="any">
<cfdump var="#cfcatch#">
</cfcatch>
</cftry>

<cfreturn query />
</cffunction>

No comments: