Forum Discussion
Removing duplicates and sorting data in columns instead of rows
- Jun 08, 2024
As variant bit more close to UI.
First query, SourceRaw, loads the file and declares data types (not sure which exactly locale shall be used)
let pFilepath = "C:\Test\My_Projects_&_Quotations_2024_06_05_22_36.csv", Source = Csv.Document( File.Contents(pFilepath), [Delimiter=",", Columns=15, Encoding=65001, QuoteStyle=QuoteStyle.None] ), PromoteHeaders = Table.PromoteHeaders(Source, [PromoteAllScalars=true]), DeclareType = Table.TransformColumnTypes( PromoteHeaders, { {"Gross Value", Currency.Type} , {"Date Created", type datetime} , {"Date Modified", type datetime} , {"Completion date", type date} }, "fr-LU") in DeclareTypeReferenced SourceRaw query, Projects, keeps columns with repeated values for each project and removes dupicates
let Source = SourceRaw, SelectColumns = Table.RemoveColumns(Source,{"Industry Name", "Industry", "Quoted to"}), RemoveDuplicates = Table.Distinct(SelectColumns) in RemoveDuplicatesQuery Industry pivots projects on them
let Source = SourceRaw, SelectColumns = Table.SelectColumns(Source,{"Project Name", "Industry", "Industry Name"}), ProjectIndustry = Table.Pivot( SelectColumns, List.Distinct(SelectColumns[Industry]), "Industry", "Industry Name", List.Min) in ProjectIndustryQuery QutedTo expands quotes to columns
let Source = SourceRaw, SelectColumns = Table.SelectColumns(Source,{"Project Name", "Quoted to"}), NoBlanks = Table.SelectRows(SelectColumns, each ([Quoted to] <> "")), RemoveDuplicates = Table.Distinct(NoBlanks), CountRecords = Table.Group( RemoveDuplicates, {"Project Name"}, {{"Count", each Table.RowCount(_), Int64.Type}}), QuoteCount = List.Max(CountRecords[Count]), names = List.Transform( {1..QuoteCount}, (q) => "Quote to #" & Text.From(q)), ListQutedTo = Table.Group( RemoveDuplicates, {"Project Name"}, {{"Quoted to", each [#"Quoted to"] }}), CombineQuotedTo = Table.TransformColumns( ListQutedTo, {"Quoted to", each Text.Combine(List.Transform(_, Text.From), "="), type text}), ExpandQuotedTo = Table.SplitColumn( CombineQuotedTo, "Quoted to", Splitter.SplitTextByDelimiter("=", QuoteStyle.Csv), names) in ExpandQuotedToFinally merge all togeteher
let MergeWithIndustry = Table.NestedJoin( Projects, {"Project Name"}, Industry, {"Project Name"}, "Industry", JoinKind.LeftOuter), IndustryNames = List.Skip( Table.ColumnNames(MergeWithIndustry{0}[Industry]) ), ExpandIndustry = Table.ExpandTableColumn( MergeWithIndustry, "Industry", IndustryNames), MergeQuotedTo = Table.NestedJoin( ExpandIndustry, {"Project Name"}, QuotedTo, {"Project Name"}, "QuotedTo", JoinKind.LeftOuter), QuotedToNames = List.Skip( Table.ColumnNames(MergeQuotedTo{0}[QuotedTo]) ), ExpandQuotedTo = Table.ExpandTableColumn( MergeQuotedTo, "QuotedTo", QuotedToNames) in ExpandQuotedTo
rachel Thank you so much for this but i dont really know how to apply these steps...could you please elaborate it more or share with me the file? Much appreciate it
As variant bit more close to UI.
First query, SourceRaw, loads the file and declares data types (not sure which exactly locale shall be used)
let
pFilepath = "C:\Test\My_Projects_&_Quotations_2024_06_05_22_36.csv",
Source = Csv.Document(
File.Contents(pFilepath),
[Delimiter=",", Columns=15, Encoding=65001, QuoteStyle=QuoteStyle.None]
),
PromoteHeaders = Table.PromoteHeaders(Source, [PromoteAllScalars=true]),
DeclareType = Table.TransformColumnTypes(
PromoteHeaders,
{
{"Gross Value", Currency.Type}
, {"Date Created", type datetime}
, {"Date Modified", type datetime}
, {"Completion date", type date}
}, "fr-LU")
in
DeclareType
Referenced SourceRaw query, Projects, keeps columns with repeated values for each project and removes dupicates
let
Source = SourceRaw,
SelectColumns = Table.RemoveColumns(Source,{"Industry Name", "Industry", "Quoted to"}),
RemoveDuplicates = Table.Distinct(SelectColumns)
in
RemoveDuplicates
Query Industry pivots projects on them
let
Source = SourceRaw,
SelectColumns = Table.SelectColumns(Source,{"Project Name", "Industry", "Industry Name"}),
ProjectIndustry = Table.Pivot(
SelectColumns,
List.Distinct(SelectColumns[Industry]),
"Industry", "Industry Name", List.Min)
in
ProjectIndustry
Query QutedTo expands quotes to columns
let
Source = SourceRaw,
SelectColumns = Table.SelectColumns(Source,{"Project Name", "Quoted to"}),
NoBlanks = Table.SelectRows(SelectColumns, each ([Quoted to] <> "")),
RemoveDuplicates = Table.Distinct(NoBlanks),
CountRecords = Table.Group(
RemoveDuplicates,
{"Project Name"},
{{"Count", each Table.RowCount(_), Int64.Type}}),
QuoteCount = List.Max(CountRecords[Count]),
names = List.Transform( {1..QuoteCount}, (q) => "Quote to #" & Text.From(q)),
ListQutedTo = Table.Group(
RemoveDuplicates,
{"Project Name"},
{{"Quoted to", each [#"Quoted to"] }}),
CombineQuotedTo = Table.TransformColumns(
ListQutedTo,
{"Quoted to", each Text.Combine(List.Transform(_, Text.From), "="), type text}),
ExpandQuotedTo = Table.SplitColumn(
CombineQuotedTo,
"Quoted to",
Splitter.SplitTextByDelimiter("=", QuoteStyle.Csv),
names)
in
ExpandQuotedTo
Finally merge all togeteher
let
MergeWithIndustry = Table.NestedJoin(
Projects, {"Project Name"},
Industry, {"Project Name"},
"Industry",
JoinKind.LeftOuter),
IndustryNames = List.Skip( Table.ColumnNames(MergeWithIndustry{0}[Industry]) ),
ExpandIndustry = Table.ExpandTableColumn(
MergeWithIndustry,
"Industry",
IndustryNames),
MergeQuotedTo = Table.NestedJoin(
ExpandIndustry, {"Project Name"},
QuotedTo, {"Project Name"},
"QuotedTo",
JoinKind.LeftOuter),
QuotedToNames = List.Skip( Table.ColumnNames(MergeQuotedTo{0}[QuotedTo]) ),
ExpandQuotedTo = Table.ExpandTableColumn(
MergeQuotedTo,
"QuotedTo",
QuotedToNames)
in
ExpandQuotedTo- JeffinoJun 11, 2024Copper Contributor
Thank you all very much! Really appreciate it!
I have used SergeiBaklan response since it is more in line with my capabilities and knowledge
Great community!
- SergeiBaklanJun 11, 2024Diamond Contributor
Jeffino , thank you.
Independently of your choice. Everything could be done in several ways. That's a good approach to select one which is easier in maintenance if only performance is not a showstopper.
- rachelJun 08, 2024Iron ContributorIt is "close to UI" but still has lots of funky syntax....
- SergeiBaklanJun 08, 2024Diamond Contributor
Only 4 manually added lines in all queries, to generate and get intermediate tables column names.