Groups | Blog | Home
all groups > sql server programming > june 2004 >

sql server programming : Replacing a string value across all char, varchar, text, nchar, nvarchar, ntext fields in all user tables



Bri Gipson
6/25/2004 10:43:13 PM
I've been slammed before for posting so called "solutions" on newsgroups, but it was all for the best since it taught me a few lessons. This is being posted for a couple of reasons. The first one is that I didn't find any other posts that had a solution
to this problem. The other is that I figure someone out there will have improvements on it and I'd like to learn them. The process is rather ugly, involving cursors, temp tables, direct calls to the system tables (instead of the recommended stored
procedure method) but it seems to be working. Hopefully this thread will be helpful to others in the future.

------------------------------------------------------------------------------------------------------------------------
-- How to replace a string value across all char, varchar, text, nchar, nvarchar and ntext fields in all user tables. --
------------------------------------------------------------------------------------------------------------------------

/* ============================================================ */
/* SP: sp_textreplace */
/* ============================================================ */
/* If procedure already exists...delete it */
IF EXISTS (SELECT 1 FROM sysobjects
WHERE name = 'sp_textreplace'
AND type = 'P')

BEGIN
DROP PROC sp_textreplace
END

GO

/**********************************************************/

CREATE PROCEDURE sp_textreplace

/*** Proc: sp_textreplace *********************************/
/* Description: */
/* A procedure that does replacement of text in a field */
/* defined by the user and of a TEXT data type. */
/* */
/* The SQL REPLACE fn doesn't work on TEXT data types, so */
/* we have to break the TEXT field into multiple VARCHARs */
/* and do REPLACE on those, then place them back into the */
/* field value through a text pointer. */
/* */
/* It is strongly advised that this script only executes */
/* during a time when nobody will be accessing the data. */
/* */
/* Input: table_name, text_field_name, record_id, */
/* find_text, replace_text */
/* */
/* Developer Task Date */
/* ------------------------------- ---- -------- */
/* Bri Gipson 06-22-2004 */
/* Created */
/* */
/* Bri Gipson 06-25-2004 */
/* Genericised to handle any table/field instead of */
/* just the table and field of a hard-coded object. */
/* (previously meta_data_glob.glob) */
/**********************************************************/

/* Input Values */
@table_name VARCHAR(256),
@field_name VARCHAR(256),
@record_id NUMERIC,
@find VARCHAR(256),
@replace VARCHAR(256)

/* Output Values */
-- None

AS
BEGIN

/* local variables */
DECLARE @ptr BINARY(16)
DECLARE @str VARCHAR(8000)
DECLARE @len BIGINT
DECLARE @del INT
DECLARE @pos INT
DECLARE @current_id NUMERIC
DECLARE @first_id NUMERIC
DECLARE @last_id NUMERIC
DECLARE @populate VARCHAR(8000)
DECLARE @update VARCHAR(8000)

CREATE TABLE #glob ( glob_id NUMERIC, glob_ptr BINARY(16), glob_length BIGINT, glob TEXT )

IF ( @record_id = 0 )
SET @populate = 'INSERT INTO #glob ( glob_id, glob_ptr, glob_length, glob ) SELECT ' + @table_name + '_id, tptr = TEXTPTR( ' + @field_name + ' ), dlength = DATALENGTH( ' + @field_name + ' ), tfield = ' + @field_name + ' FROM ' + @table_name
ELSE
SET @populate = 'INSERT INTO #glob ( glob_id, glob_ptr, glob_length, glob ) SELECT ' + @table_name + '_id, tptr = TEXTPTR( ' + @field_name + ' ), dlength = DATALENGTH( ' + @field_name + ' ), tfield = ' + @field_name + ' FROM ' + @table_name + ' WHERE
' + @table_name + '_id = ' + CONVERT( VARCHAR(10), @record_id )

EXEC ( @populate )

DECLARE glob_cursor CURSOR FOR
SELECT glob_id, glob_ptr, glob_length
FROM #glob

OPEN glob_cursor
FETCH NEXT FROM glob_cursor INTO @current_id, @ptr, @len

WHILE @@FETCH_STATUS = 0
BEGIN
SET @pos = 1

WHILE @pos < @len
BEGIN
-- Do our replacement:

-- Grab our substring of up to 4000 bytes.
-- We do only 4000 to leave room for replacement
-- that occurrs later on. Since REPLACE can only
-- return 8000 bytes, we need to greatly shrink our
-- original string to occomodate potential expansion
-- of the text.
SELECT
@str = REPLACE( SUBSTRING( glob, @pos, 4000 ), @find, @replace )
FROM
#glob
WHERE
glob_id = @current_id

-- Define values for the variables we send to UPDATETEXT
SELECT @pos = @pos - 1, @del = DATALENGTH( @str )

-- UPDATE our GLOB with the new replaced string
SET @str = REPLACE( @str, '''', '''''' )
SET @update = 'DECLARE @ptr BINARY(16) SET @ptr = CONVERT( BINARY(16), CONVERT( UNIQUEIDENTIFIER, ''' + CONVERT( VARCHAR(255), CONVERT( UNIQUEIDENTIFIER, @ptr ) ) + ''' ) ) ' +
'UPDATETEXT ' + @table_name + '.' + @field_name + ' ' +
'@ptr ' +
CONVERT( VARCHAR(255), @pos ) + ' ' +
CONVERT( VARCHAR(256), @del ) + ' '''
-- PRINT @update
EXECUTE ( @update + @str + '''' )

-- UPDATETEXT
-- meta_data_glob.glob @ptr
-- @pos
-- @del
-- @str

-- If we had a full plate of 4000 bytes, we move forward 100 bytes less.
-- because we want some overlap in case we split a word in two from
-- our current replace cycle.
-- If we don't have a full plate, that means we're at the end.
SELECT @pos = CASE WHEN @del < 4000 THEN @len + 1 ELSE @pos + 5901 END
END

FETCH NEXT FROM glob_cursor INTO @current_id, @ptr, @len
END

CLOSE glob_cursor
DEALLOCATE glob_cursor
END

/* Use these commands to test FROM the command line: */
/* EXECUTE sp_textreplace 'meta_data_glob', 'glob', 0, 'house', 'mouse' */
GO



/* ======================================================================= */
/* File name: Replace All Strings.sql */
/* DBMS name: Microsoft SQL Server 2000 */
/* Created on: 06-22-2004 */
/* Last Modified: 06-22-2004 */
/* Description: */
Aaron [SQL Server MVP]
6/26/2004 12:48:25 AM
I have a routine for handling replace() in TEXT columns also.
http://www.aspfaq.com/2445

--
http://www.aspfaq.com/
(Reverse address to reply.)

[quoted text, click to view]
Dan Guzman
6/27/2004 9:57:28 PM
[quoted text, click to view]

BTW, it's a good practice to avoid naming procs with the 'sp_' prefix.

--
Hope this helps.

Dan Guzman
SQL Server MVP

Aaron [SQL Server MVP]
6/30/2004 11:11:53 AM
http://groups.google.com/groups?q=sp+prefix+group%3A*sqlserver*&ie=UTF-8&hl=en

--
http://www.aspfaq.com/
(Reverse address to reply.)




[quoted text, click to view]

Aaron [SQL Server MVP]
6/30/2004 12:01:05 PM
Yes, you're right, I assumed it would be a word.

--
http://www.aspfaq.com/
(Reverse address to reply.)




[quoted text, click to view]

Bri Gipson
6/30/2004 2:56:56 PM
I haven't heard that one before. Why is that?

Thanks,
Bri

--

[quoted text, click to view]
Bri Gipson
6/30/2004 3:08:08 PM
Thanks!

I really like how your code is more optimized for run-time. It only replaces text when it's necessary.
I think I'll try to use your process. The main catch that I had to work through is that we have over 30
fields in our database that use the TEXT datatype, so to use this I would have to do similar look-up
table and EXECUTE ( string ) methods.

Also, I noticed in your code:

SELECT @txtPval = TEXTPTR(txt) FROM data WHERE id=@curID
SELECT @txtPidx = PATINDEX('%'+@badStr+'%', txt)
FROM data WHERE id=@curID

IF @txtPidx > 0
BEGIN
WHILE (SELECT PATINDEX('%'+@badStr+'%', txt)
FROM data WHERE id=@curID) > 0
BEGIN
SELECT @txtPidx = PATINDEX('%'+@badStr+'%', txt)-1
FROM data WHERE id=@curID

UPDATETEXT data.txt @txtPval @txtPidx @lenbadStr @goodStr
END
END

Why didn't you begin the WHILE loop on the outside of your structure such as:

SELECT @txtPval = TEXTPTR(txt) FROM data WHERE id=@curID
SELECT @txtPidx = PATINDEX('%'+@badStr+'%', txt)
FROM data WHERE id=@curID

WHILE @txtPidx > 0
BEGIN
SELECT @txtPidx = @txtPidx - 1
UPDATETEXT data.txt @txtPval @txtPidx @lenbadStr @goodStr

SELECT @txtPidx = PATINDEX('%'+@badStr+'%', txt)
FROM data WHERE id=@curID
END

I'm not sure if the nesting you used offers benefit or not which is why I'm asking.

Thanks,
Bri

---

[quoted text, click to view]
Bri Gipson
6/30/2004 3:25:56 PM
Aaron,

Not trying to sound trite or ungrateful but you used a LEN rather than a DATALENGTH to capture
then length of the "Bad String". This would cause problems if you wanted to replace something with
a trailing space.

BTW - thanks for your site. I had discovered it just last week and started going through all your
RSS feeds. Very nice!

Thanks,
Bri

---

[quoted text, click to view]
Bri Gipson
6/30/2004 3:31:32 PM
Aaron,

Thank you VERY much. Wish someone told me this years ago. Now there are literally over a hundred
user defined database specific (not system) stored procedures. I guess it would have been better
to go with gut instinct and use spu_ instead. I think BP Margolin said it best in one of his
posts:

The one naming convention that SQL Server does use that is STRONGLY not
recommended for developers is the "sp " prefix for stored procedures.
Stored procedures using the "sp " prefix are identified by SQL Server as
probably system stored procedures, and there are certain behaviors
associated with system stored procedures that are optimal for system
stored procedure but very sub-optimal for user defined stored
procedures.

Thanks,
Bri

---

[quoted text, click to view]
Bri Gipson
6/30/2004 4:28:15 PM
Thanks for all of your input.

Below is a revised version of the stored procedure. It uses spu_ instead of sp_, only does replacements on records that require them, and only replaces the text found rather than an entire text block. It's nice to get such valuable feedback, so please
advise where you see issues or improvements.

This assumes the schema is designed where each table that could have a TEXT data type would have some unique numeric identifier in a column with the naming convention <table_name>_id.

Is there a better way to handle converting a BINARY(16)? I couldn't use VARCHAR, INT or BIGINT since they either didn't convert correctly, or the byte size differed enough to truncate the binary value. The only data type that I could find to do this with
decency was the UNIQUEIDENTIFIER. Is there a better, or more generic datatype?

Thanks again for your comments,
Bri

--

/* ============================================================ */
/* SP: sp_textreplace */
/* ============================================================ */
/* If procedure already exists...delete it */
IF EXISTS (SELECT 1 FROM sysobjects
WHERE name = 'spu_textreplace'
AND type = 'P')

BEGIN
DROP PROC spu_textreplace
END

GO

/**********************************************************/

CREATE PROCEDURE spu_textreplace

/*** Proc: spu_textreplace ********************************/
/* Description: */
/* A procedure that does replacement of text in a field */
/* defined by the user and of a TEXT data type. */
/* */
/* The SQL REPLACE fn doesn't work on TEXT data types, so */
/* we have to break the TEXT field into multiple VARCHARs */
/* and do REPLACE on those, then place them back into the */
/* field value through a text pointer. */
/* */
/* It is strongly advised that this script only executes */
/* during a time when nobody will be accessing the data. */
/* */
/* Input: table_name, text_field_name, record_id, */
/* find_text, replace_text */
/* */
/* Developer Task Date */
/* ------------------------------- ---- -------- */
/* Bri Gipson 06-22-2004 */
/* Created */
/* */
/* Bri Gipson 06-25-2004 */
/* Genericised to handle any table/field instead of */
/* just the table and field of a hard-coded object. */
/* (previously meta_data_glob.glob) */
/* */
/* Bri Gipson 06-30-2004 */
/* Encorporated Aaron Bertrand's optimizations to only */
/* commit an UPDATETEXT when necessary and to only do */
/* the update for the specified replacement, not an */
/* entire chunk of text. */
/* http://www.aspfaq.com/show.asp?id=2445 */
/**********************************************************/

/* Input Values */
@table_name VARCHAR(256),
@field_name VARCHAR(256),
@record_id NUMERIC,
@find VARCHAR(256),
@replace VARCHAR(256)

/* Output Values */
-- None

AS
BEGIN

SET NOCOUNT ON
PRINT 'Replacing the text "' + @find + '" with "' + @replace + '" for ' + @table_name + '.' + @field_name

/* local variables */
DECLARE @ptr BINARY(16)
DECLARE @pos INT
DECLARE @current_id NUMERIC
DECLARE @populate VARCHAR(8000)
DECLARE @update VARCHAR(8000)
DECLARE @find_len INT
DECLARE @update_txt_pidx VARCHAR(8000)

SELECT @find_len = DATALENGTH( @find )

CREATE TABLE #glob ( glob_id NUMERIC, glob_ptr BINARY(16), txt_pidx INT )

IF ( @record_id = 0 )
SET @populate = 'INSERT INTO #glob ( glob_id, glob_ptr, txt_pidx ) SELECT ' + @table_name + '_id, tptr = TEXTPTR( ' + @field_name + ' ), PATINDEX(''%' + @find + '%'', ' + @field_name + ' ) FROM ' + @table_name
ELSE
SET @populate = 'INSERT INTO #glob ( glob_id, glob_ptr, txt_pidx ) SELECT ' + @table_name + '_id, tptr = TEXTPTR( ' + @field_name + ' ), PATINDEX(''%' + @find + '%'', ' + @field_name + ' ) FROM ' + @table_name + ' WHERE ' + @table_name + '_id = ' +
CONVERT( VARCHAR(10), @record_id )

EXEC ( @populate )

DECLARE glob_cursor INSENSITIVE CURSOR FOR
SELECT glob_id, glob_ptr
FROM #glob
WHERE txt_pidx > 0

OPEN glob_cursor
FETCH NEXT FROM glob_cursor INTO @current_id, @ptr

WHILE @@FETCH_STATUS = 0
BEGIN

SELECT
@pos = txt_pidx
FROM
#glob
WHERE
#glob.glob_id = @current_id

WHILE @pos > 0
BEGIN
PRINT 'Found "' + @find + '" in row id ' + CAST( @current_id AS VARCHAR(10) ) + ' at position ' + CAST( @pos AS VARCHAR(10) )

-- Do our replacement:
SELECT @pos = @pos - 1

SET @update =
'DECLARE @ptr BINARY(16) ' +
'SET @ptr = CONVERT( BINARY(16), CONVERT( UNIQUEIDENTIFIER, ''' + CONVERT( VARCHAR(255), CONVERT( UNIQUEIDENTIFIER, @ptr ) ) + ''' ) ) ' +
'UPDATETEXT ' + @table_name + '.' + @field_name + ' ' +
'@ptr ' +
CONVERT( VARCHAR(255), @pos ) + ' ' +
CONVERT( VARCHAR(256), @find_len ) + ' ' +
'''' + @replace + ''''
-- PRINT @update + ''''
EXECUTE ( @update )

SET @update_txt_pidx =
'UPDATE ' +
'#glob ' +
'SET ' +
'txt_pidx = PATINDEX(''%' + @find + '%'', ' + @table_name + '.' + @field_name + ' ) ' +
'FROM ' +
@table_name + ' ' +
'WHERE ' +
@table_name + '_id = ' + CAST( @current_id AS VARCHAR(10) ) + ' AND ' +
'glob_id = ' + CAST( @current_id AS VARCHAR(10) )
-- PRINT @update_txt_pidx + ''''
EXECUTE ( @update_txt_pidx )

SELECT
@pos = txt_pidx
FROM
#glob
WHERE
#glob.glob_id = @current_id

END

FETCH NEXT FROM glob_cursor INTO @current_id, @ptr
END

CLOSE glob_cursor
DEALLOCATE glob_cursor

SET NOCOUNT OFF

END

/* Use these commands to test FROM the command line: */
/* EXECUTE spu_textreplace 'meta_data_glob', 'glob', 0, 'house', 'mouse' */
GO
AddThis Social Bookmark Button