Apr 15 2021 02:02 AM
Hi,
I've setup a query to read the content of a Table inside several Excel files in a folder.
My problem is that those files are quite big (3MB+ each) and when the folder contains many files the process is getting really slow.
Is there a way to speed things up in M code?
This my origin query:
let
Origine = Folder.Files("C:\Users\...myPath....\myFolder")
in
Origine
and this is the query I use to extract the content of the tables:
let
Origine = ODPcartellatutti_i_file,
#"Filtrate righe1" = Table.SelectRows(Origine, each Text.Contains([Extension], "xls") or Text.Contains([Extension], "XLS")),
#"Rimosse colonne" = Table.RemoveColumns(#"Filtrate righe1",{"Date accessed", "Date modified", "Date created", "Folder Path"}),
#"Aggiunta colonna personalizzata" = Table.AddColumn(#"Rimosse colonne", "Tabella_Riepilogo", each try Excel.Workbook([Content],null, true) otherwise ""),
#"Tabella Tabella_Riepilogo espansa" = Table.ExpandTableColumn(#"Aggiunta colonna personalizzata", "Tabella_Riepilogo", {"Name", "Data", "Item"}, {"Tabella_Riepilogo.Name", "Tabella_Riepilogo.Data", "Tabella_Riepilogo.Item"}),
#"Filtrate righe" = Table.SelectRows(#"Tabella Tabella_Riepilogo espansa", each try ([Tabella_Riepilogo.Name] = "TAB_ODP") otherwise ""),
#"Tabella Tabella_Riepilogo.Data espansa" = Table.ExpandTableColumn(#"Filtrate righe", "Tabella_Riepilogo.Data", {"Cliente", "Oggetto", "Materiale", "Data ordine", "Data consegna", "Autore", "Importo", "Acconto", "Saldo", "TAGLIA-B.", "DISCO-FILO", "FILO-SAG", "TELAIO", "WATER JET", "INTERMAC", "OMAG/GMM", "FRESE", "VARIE"}, {"Tabella_Riepilogo.Data.Cliente", "Tabella_Riepilogo.Data.Oggetto", "Tabella_Riepilogo.Data.Materiale", "Tabella_Riepilogo.Data.Data ordine", "Tabella_Riepilogo.Data.Data consegna", "Tabella_Riepilogo.Data.Autore", "Tabella_Riepilogo.Data.Importo", "Tabella_Riepilogo.Data.Acconto", "Tabella_Riepilogo.Data.Saldo", "Tabella_Riepilogo.Data.TAGLIA-B.", "Tabella_Riepilogo.Data.DISCO-FILO", "Tabella_Riepilogo.Data.FILO-SAG", "Tabella_Riepilogo.Data.TELAIO", "Tabella_Riepilogo.Data.WATER JET", "Tabella_Riepilogo.Data.INTERMAC", "Tabella_Riepilogo.Data.OMAG/GMM", "Tabella_Riepilogo.Data.FRESE", "Tabella_Riepilogo.Data.VARIE"}),
#"Rimosse colonne1" = Table.RemoveColumns(#"Tabella Tabella_Riepilogo.Data espansa",{"Extension", "Attributes", "Tabella_Riepilogo.Name"}),
#"Rinominate colonne" = Table.RenameColumns(#"Rimosse colonne1",{{"Tabella_Riepilogo.Data.Cliente", "Nome cliente"}, {"Tabella_Riepilogo.Data.Materiale", "Materiale"}, {"Tabella_Riepilogo.Data.Data ordine", "Data ordine"}, {"Tabella_Riepilogo.Data.Data consegna", "Data consegna"}, {"Tabella_Riepilogo.Data.Autore", "Autore"}, {"Tabella_Riepilogo.Data.Importo", "Importo netto"}, {"Tabella_Riepilogo.Data.Acconto", "Acconto netto"}, {"Tabella_Riepilogo.Data.Saldo", "Dalso netto"}, {"Tabella_Riepilogo.Data.Oggetto", "Oggetto"}}),
#"Modificato tipo" = Table.TransformColumnTypes(#"Rinominate colonne",{{"Importo netto", Currency.Type}, {"Dalso netto", Int64.Type}, {"Acconto netto", Int64.Type}}),
#"Rimosse colonne2" = Table.RemoveColumns(#"Modificato tipo",{"Tabella_Riepilogo.Item", "Content"}),
#"Rinominate colonne1" = Table.RenameColumns(#"Rimosse colonne2",{{"Name", "Name File"}, {"Dalso netto", "Saldo netto"}})
in
#"Rinominate colonne1"
where I think this line of code is slowing things down
#"Aggiunta colonna personalizzata" = Table.AddColumn(#"Rimosse colonne", "Tabella_Riepilogo", each try Excel.Workbook([Content],null, true) otherwise ""),
because is getting all Excel files content and then in the next steps I'm filtering the rows that contain the table name "TAB_ODP" which is what I'm looking for.
Is there a way to directly get the table (if exists in the file) instead of getting ALL the content and then filtering?
Many thanks in advanced!
Apr 15 2021 09:03 AM
Apr 15 2021 09:29 AM
@mathetesThere's no VBa here, just pure M Language in PwerQuery. I do need to be able to pull data from closed excel files, so FILTER function is not right for this.
Apr 15 2021 01:31 PM
Perhaps something like this, at least for the part in question
let
Origine = ODPcartellatutti_i_file,
#"Filtrate righe1" = Table.SelectRows(
Origine,
each
Text.Contains([Extension], "xls") or
Text.Contains([Extension], "XLS")
),
#"Rimosse colonne" = Table.RemoveColumns(
#"Filtrate righe1",
{"Date accessed", "Date modified", "Date created", "Folder Path"}
),
#"Aggiunta colonna personalizzata" = Table.AddColumn(
#"Rimosse colonne",
"Tabella_Riepilogo",
Excel.Workbook([Content],null, true)
),
#"Removed Other Columns" = Table.SelectColumns(
#"Aggiunta colonna personalizzata",
{"Tabella_Riepilogo"}
),
#"Removed Errors" = Table.RemoveRowsWithErrors(
#"Removed Other Columns",
{"Tabella_Riepilogo"}
),
#"Tabella Tabella_Riepilogo espansa" = Table.ExpandTableColumn(
#"Removed Errors",
"Tabella_Riepilogo",
{"Name", "Data", "Item", "Kind"},
{"Tabella_Riepilogo.Name", "Tabella_Riepilogo.Data", "Tabella_Riepilogo.Item"}
),
#"Filtrate righe" = Table.SelectRows(
#"Tabella Tabella_Riepilogo espansa",
each ([Kind] = "Table") and ([Name] = "TAB_ODP")
),
#"Removed Other Columns1" = Table.SelectColumns(
#"Filtrate righe",
{"Data"}
),
#"Tabella Tabella_Riepilogo.Data espansa" = Table.ExpandTableColumn(
#"Removed Other Columns1",
"Tabella_Riepilogo.Data",
{"Cliente", "Oggetto", "Materiale", "Data ordine", "Data consegna",
"Autore", "Importo", "Acconto", "Saldo", "TAGLIA-B.", "DISCO-FILO",
"FILO-SAG", "TELAIO", "WATER JET", "INTERMAC", "OMAG/GMM",
"FRESE", "VARIE"},
{
"Nome cliente",
"Oggetto",
"Materiale",
"Data ordine",
"Data consegna",
"Autore",
"Importo netto",
"Acconto netto",
"Dalso netto",
"Tabella_Riepilogo.Data.TAGLIA-B.",
"Tabella_Riepilogo.Data.DISCO-FILO",
"Tabella_Riepilogo.Data.FILO-SAG",
"Tabella_Riepilogo.Data.TELAIO",
"Tabella_Riepilogo.Data.WATER JET",
"Tabella_Riepilogo.Data.INTERMAC",
"Tabella_Riepilogo.Data.OMAG/GMM",
"Tabella_Riepilogo.Data.FRESE",
"Tabella_Riepilogo.Data.VARIE"
}
),
#"Modificato tipo" = Table.TransformColumnTypes(
#"Tabella Tabella_Riepilogo.Data espansa",
{
{"Importo netto", Currency.Type},
{"Dalso netto", Int64.Type},
{"Acconto netto", Int64.Type}
}
),
#"Rimosse colonne2" = Table.RemoveColumns(
#"Modificato tipo",
{"Tabella_Riepilogo.Item", "Content"}
),
#"Rinominate colonne1" = Table.RenameColumns(
#"Rimosse colonne2",
{
{"Name", "Name File"},
{"Dalso netto", "Saldo netto"}
}
)
in
#"Rinominate colonne1"
Apr 21 2021 08:38 AM