Comments (9)
Hi! Thanks for stopping by!
I don't think it's an over-complication, but like I said in the post "I found the numbers table approach faster. Granted, it’s also less portable, but that’s a trade-off I’m willing to make".
IIRC the numbers table approach was 3-4 seconds faster in the example queries. I suppose it depends on what you're doing with them, though.
from darlingdata.
I found the following test script very surprising, the second version is more performant and portable!
SET NOCOUNT ON;
---------------------------------------------------------------------------------------------------------
-- mmcapps88 cCTE
---------------------------------------------------------------------------------------------------------
PRINT '---------------------------------------------------------------------------------------------------------
mmcapps88 cCTE
---------------------------------------------------------------------------------------------------------';
DECLARE @string NVARCHAR(4000) = N'rfge535drgsgh786rfvsfasf';
SET STATISTICS IO ON;
SET STATISTICS TIME ON;
WITH E1 (N)
AS (SELECT 1
UNION ALL
SELECT 1
UNION ALL
SELECT 1
UNION ALL
SELECT 1
UNION ALL
SELECT 1
UNION ALL
SELECT 1
UNION ALL
SELECT 1
UNION ALL
SELECT 1
UNION ALL
SELECT 1
UNION ALL
SELECT 1) --10E+1 or 10 rows
, E2 (N)
AS (SELECT 1 FROM E1 a, E1 b) --10E+2 or 100 rows
, E4 (N)
AS (SELECT 1 FROM E2 a, E2 b) --10E+4 or 10,000 rows max
, cteTally (N)
AS ( --=== This provides the "zero base" and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT 0
UNION ALL
SELECT TOP (DATALENGTH(ISNULL(@string, 1))) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM
E4)
, x
AS (SELECT TOP (LEN(@string)) ROW_NUMBER() OVER (ORDER BY n.N) x
FROM
cteTally n)
SELECT CONVERT(NVARCHAR(4000)
, ( SELECT SUBSTRING(@string COLLATE Latin1_General_100_BIN2, x.x, 1)
FROM
x x
WHERE
SUBSTRING(@string COLLATE Latin1_General_100_BIN2, x.x, 1) LIKE '[0-9]'
ORDER BY
x.x
FOR XML PATH(''))) numbers_only;
SET STATISTICS IO OFF;
SET STATISTICS TIME OFF;
GO
---------------------------------------------------------------------------------------------------------
-- Itzik Ben-gan cCTE max 10,000
---------------------------------------------------------------------------------------------------------
PRINT '---------------------------------------------------------------------------------------------------------
Itzik Ben-gan cCTE
---------------------------------------------------------------------------------------------------------';
DECLARE @string NVARCHAR(4000) = N'rfge535drgsgh786rfvsfasf';
SET STATISTICS IO ON;
SET STATISTICS TIME ON;
WITH E1 (N)
AS (SELECT 1
FROM (VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) DT (N) ) --10 rows
, E2 (N)
AS (SELECT 1 FROM E1 A CROSS JOIN E1 B) --100 rows
, E4 (N)
AS (SELECT 1 FROM E2 A CROSS JOIN E2 B) --10,000 rows
, x
AS (SELECT TOP (LEN(@string)) ROW_NUMBER() OVER (ORDER BY n.N) x
FROM
E4 AS n)
SELECT CONVERT(NVARCHAR(4000)
, ( SELECT SUBSTRING(@string COLLATE Latin1_General_100_BIN2, x.x, 1)
FROM
x AS x
WHERE
SUBSTRING(@string COLLATE Latin1_General_100_BIN2, x.x, 1) LIKE '[0-9]'
ORDER BY
x.x
FOR XML PATH(''))) numbers_only;
SET STATISTICS IO OFF;
SET STATISTICS TIME OFF;
GO
---------------------------------------------------------------------------------------------------------
-- fnTally from Jeff Moden
---------------------------------------------------------------------------------------------------------
PRINT '---------------------------------------------------------------------------------------------------------
fnTally from Jeff Moden
---------------------------------------------------------------------------------------------------------';
/*
CREATE FUNCTION [dbo].[fnTally]
(@ZeroOrOne BIT, @MaxN BIGINT)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN WITH
H2(N) AS ( SELECT 1
FROM (VALUES
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
)V(N)) --16^2 or 256 rows
, H4(N) AS (SELECT 1 FROM H2 a, H2 b) --16^4 or 65,536 rows
, H8(N) AS (SELECT 1 FROM H4 a, H4 b) --16^8 or 4,294,967,296 rows
SELECT N = 0 WHERE @ZeroOrOne = 0 UNION ALL
SELECT TOP(@MaxN)
N = ROW_NUMBER() OVER (ORDER BY N)
FROM H8
;
GO
*/
DECLARE @string NVARCHAR(4000) = N'rfge535drgsgh786rfvsfasf';
SET STATISTICS IO ON;
SET STATISTICS TIME ON;
SELECT CONVERT(NVARCHAR(4000)
, ( SELECT SUBSTRING(@string COLLATE Latin1_General_100_BIN2, x.N, 1)
FROM
dbo.fnTally(1, 4000) x
WHERE
SUBSTRING(@string COLLATE Latin1_General_100_BIN2, x.N, 1) LIKE '[0-9]'
ORDER BY
x.N
FOR XML PATH(''))) numbers_only;
GO
SET STATISTICS IO OFF;
SET STATISTICS TIME OFF;
---------------------------------------------------------------------------------------------------------
-- Permanent Table dbo.Tally, contains 0 - 10,000
---------------------------------------------------------------------------------------------------------
--CREATE TABLE dbo.Tally
--(
-- N INT NOT NULL PRIMARY KEY
--)
--INSERT tally(N)
--SELECT N FROM dbo.fnTally(0,10000) ft
PRINT '---------------------------------------------------------------------------------------------------------
Permanent Table dbo.Tally
---------------------------------------------------------------------------------------------------------';
DECLARE @string NVARCHAR(4000) = N'rfge535drgsgh786rfvsfasf';
SET STATISTICS IO ON;
SET STATISTICS TIME ON;
WITH x
AS (SELECT TOP (LEN(@string)) ROW_NUMBER() OVER (ORDER BY n.N) x
FROM
dbo.Tally AS n)
SELECT CONVERT(NVARCHAR(4000)
, ( SELECT SUBSTRING(@string COLLATE Latin1_General_100_BIN2, x.x, 1)
FROM
x AS x
WHERE
SUBSTRING(@string COLLATE Latin1_General_100_BIN2, x.x, 1) LIKE '[0-9]'
ORDER BY
x.x
FOR XML PATH(''))) numbers_only;
GO
SET STATISTICS IO OFF;
SET STATISTICS TIME OFF;
from darlingdata.
@Glossopian I see the creation scripts but no details about the tests you ran.
from darlingdata.
@erikdarlingdata I stripped out the create function statements and simply ran the same test on the same sample string
DECLARE @string NVARCHAR(4000) = N'rfge535drgsgh786rfvsfasf';
The only difference is the numbers table, whether to use a cCTE, Jeff's function or a permanent table. Not an extensive test I know, but when you view the execution plans for each, the difference between each is, as I said, surprising!
HTH
from darlingdata.
@Glossopian Gotcha, yeah, I'm not interested in running these against a single short string. If you refer back to my post, the usage pattern is doing this against an entire table. Thanks though!
from darlingdata.
@erikdarlingdata Missed that ;-). I came here directly from a link sent out by Brent Ozar. And my first thought was "could we use a cCTE instead of a permanent table?" I then saw the OP had raised this too and did a quick (and dirty) test. I have re-factored the cCTE into a function and ran it against a million row test table. And you are quite right, a permanent number table wins hands down in terms of cost. Interestingly they both took around the same time, ~30 secs on my setup, but cost is key.
Thanks for responding, I'm always happy to learn.
from darlingdata.
@Glossopian yeah, it's always a lot of fun! If you show me how you tested I'll see if I can repro. Thanks!
from darlingdata.
It uses another Jeff Moden masterpiece JBMTest:
--In case someone is interested, here's my "standard" million row test table example...
/**********************************************************************************************************************
Purpose:
Create a voluminous test table with various types of highly randomized data.
--Jeff Moden
**********************************************************************************************************************/
--===== Conditionally drop the test table to make reruns easier
IF OBJECT_ID('dbo.JBMTest','U') IS NOT NULL
DROP TABLE dbo.JBMTest
;
--===== Create and populate a 1,000,000 row test table.
-- "SomeID" has a range of 1 to 1,000,000 unique numbers
-- "SomeInt" has a range of 1 to 50,000 non-unique numbers
-- "SomeLetters2" has a range of "AA" to "ZZ" non-unique 2 character strings
-- "SomeMoney has a range of 0.00 to 100.00 non-unique numbers
-- "SomeDateTime" has a range of >=01/01/2000 and <01/01/2020 non-unique date/times
-- "SomeDate" has a range of >=01/01/2000 and <01/01/2020 non-unique "whole dates"
-- "SomeName" contains random characters at random lengths from 2 to 20 characters
SELECT TOP 1000000
SomeID = IDENTITY(INT,1,1),
SomeInt = ABS(CHECKSUM(NEWID())) % 50000 + 1,
SomeLetters2 = CHAR(ABS(CHECKSUM(NEWID())) % (ASCII('Z')-ASCII('A')+1) +ASCII('A'))
+ CHAR(ABS(CHECKSUM(NEWID())) % (ASCII('Z')-ASCII('A')+1) +ASCII('A')),
SomeMoney = CAST(RAND(CHECKSUM(NEWID())) * 100 AS DECIMAL(9,2)), --Note rounding
SomeDateTime = RAND(CHECKSUM(NEWID())) * DATEDIFF(dd,'2000','2020') + CAST('2000' AS DATETIME),
SomeDate = ABS (CHECKSUM(NEWID())) % DATEDIFF(dd,'2000','2020') + CAST('2000' AS DATETIME),
SomeName = RIGHT(NEWID(),ABS(CHECKSUM(NEWID())) % 19 + 2)
INTO dbo.JBMTest
FROM sys.all_columns ac1
CROSS JOIN sys.all_columns ac2
;
ALTER TABLE dbo.JBMTest
ADD CONSTRAINT PK_JBMTest PRIMARY KEY CLUSTERED (SomeID) WITH FILLFACTOR = 90
;
SELECT TOP 1 PERCENT * FROM dbo.JBMTest jt
ORDER BY jt.SomeID;
GO
Here's the function I created (The Letters one, adapted from your function. The only change should be the omission of the numbers table, replaced with the cCTE)
ALTER FUNCTION dbo.get_letters2(@string NVARCHAR(4000))
RETURNS TABLE
WITH SCHEMABINDING
AS
RETURN
WITH E1 (N)
AS (SELECT 1
FROM (VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) DT (N) ) --10 rows
, E2 (N)
AS (SELECT 1 FROM E1 A CROSS JOIN E1 B) --100 rows
, E4 (N)
AS (SELECT 1 FROM E2 A CROSS JOIN E2 B) --10,000 rows
, x
AS (SELECT TOP (LEN(@string)) ROW_NUMBER() OVER (ORDER BY n.N) x
FROM
E4 AS n)
SELECT CONVERT(NVARCHAR(4000)
, ( SELECT SUBSTRING(@string COLLATE Latin1_General_100_BIN2, x.x, 1)
FROM
x AS x
WHERE
SUBSTRING(@string COLLATE Latin1_General_100_BIN2, x.x, 1) LIKE '[a-zA-Z]'
ORDER BY
x.x
FOR XML PATH(''))) letters_only;
And here is the test
SELECT jt.SomeName, gn.letters_only FROM dbo.JBMTest jt
CROSS APPLY dbo.get_letters(someName) gn --your function
SELECT jt.SomeName, gn.letters_only FROM dbo.JBMTest jt
CROSS APPLY dbo.get_letters2(someName) gn --my cCTE version
from darlingdata.
Very cool! Yeah, that performs about as well using the test from the blog post too. Nice work!
from darlingdata.
Related Issues (20)
- sp_QuickieStore not returning expected runtime_stats records HOT 6
- sp_QuickieStore unexpected default @end_date HOT 4
- sp_QuickieStore - Count of executions across all plan ids? HOT 2
- sp_PressureDetector: configurable CPU percent details
- sp_HumanEventsBlockViewer: Add object schema to contentious object
- Given @days_back is always negative then this would always return @days_back HOT 1
- sp_HumanEventsBlockViewer - check if databases already have RCSI enabled in sys.databases
- sp_HumanEvents: logging the one-off runs to table HOT 5
- sp_LogHunter: Add search for CAL warning
- Log_Hunter additional parameters for not delete record HOT 2
- sp_QuickieStore - Add a workday mode HOT 1
- Workdays flag causes error when @get_all_databases = 1 HOT 4
- Add a “recent” sort to QuickieStore
- All Scripts: Fix Date Math Functions To Not Rely On 0 HOT 1
- Error in the help text of sp_HumanEventsBlockViewer HOT 3
- sp_QuickieStore - do some more status checking early
- sp_QuickieStore - Add the ability to search for queries with square brackets HOT 6
- sp_QuickieStore - New parameter that allow to only show queries with Query Hints (2022+) HOT 1
- sp_HealthParser - Pending Tasks threshold
- sp_QuickieStore - Add parameters to search for plan feedback and variants
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from darlingdata.