Help with formatting dataframe

So I am trying to use pivot_wider to rearrange this dataframe. The first 100 rows are shown below:

dput(head(dat,100))
structure(list(GENE_PRODUCT_ID = c("A0A7U9C483", "A0A7U9C483", 
"A0A7U9C489", "A0A7U9C490", "A0A7U9C490", "A0A7U9C490", "A0A7U9C490", 
"A0A7U9C490", "A0A7U9C4A2", "A0A7U9C4A2", "A0A7U9C4A2", "A0A7U9C4A2", 
"A0A7U9C4A2", "A0A7U9C4A2", "A0A7U9C4A2", "A0A7U9C4A3", "A0A7U9C4A3", 
"A0A7U9C4A3", "A0A7U9C4A3", "A0A7U9C4A3", "A0A7U9C4A3", "A0A7U9C4A3", 
"A0A7U9C4A3", "A0A7U9C4A3", "A0A7U9C4A3", "A0A7U9C4A3", "A0A7U9C4A4", 
"A0A7U9C4A4", "A0A7U9C4A4", "A0A7U9C4A4", "A0A7U9C4A4", "A0A7U9C4A4", 
"A0A7U9C4A4", "A0A7U9C4A4", "A0A7U9C4B0", "A0A7U9C4B0", "A0A7U9C4B2", 
"A0A7U9C4B2", "A0A7U9C4B2", "A0A7U9C4B2", "A0A7U9C4B2", "A0A7U9C4B2", 
"A0A7U9C4B8", "A0A7U9C4C2", "A0A7U9C4C2", "A0A7U9C4C8", "A0A7U9C4C9", 
"A0A7U9C4C9", "A0A7U9C4D4", "A0A7U9C4D4", "A0A7U9C4D4", "A0A7U9C4D4", 
"A0A7U9C4D4", "A0A7U9C4D4", "A0A7U9C4D4", "A0A7U9C4D4", "A0A7U9C4D4", 
"A0A7U9C4D4", "A0A7U9C4D6", "A0A7U9C4D6", "A0A7U9C4D6", "A0A7U9C4D6", 
"A0A7U9C4D6", "A0A7U9C4D6", "A0A7U9C4D6", "A0A7U9C4E3", "A0A7U9C4E3", 
"A0A7U9C4E3", "A0A7U9C4E3", "A0A7U9C4E3", "A0A7U9C4E3", "A0A7U9C4E6", 
"A0A7U9C4E6", "A0A7U9C4E6", "A0A7U9C4E6", "A0A7U9C4E6", "A0A7U9C4E6", 
"A0A7U9C4E6", "A0A7U9C4E6", "A0A7U9C4E6", "A0A7U9C4E6", "A0A7U9C4E7", 
"A0A7U9C4E7", "A0A7U9C4F1", "A0A7U9C4F1", "A0A7U9C4F1", "A0A7U9C4F1", 
"A0A7U9C4F1", "A0A7U9C4F1", "A0A7U9C4F1", "A0A7U9C4F1", "A0A7U9C4F1", 
"A0A7U9C4F1", "A0A7U9C4F1", "A0A7U9C4F1", "A0A7U9C4F1", "A0A7U9C4F3", 
"A0A7U9C4F3", "A0A7U9C4F3", "A0A7U9C4F3"), GO_TERM = c("GO:0036094", 
"GO:0046872", "GO:0016491", "GO:0016020", "GO:0016020", "GO:0005886", 
"GO:0005886", "GO:0005886", "GO:0003677", "GO:0005737", "GO:0003677", 
"GO:0009295", "GO:0043590", "GO:0005737", "GO:0003677", "GO:0008270", 
"GO:0002100", "GO:0016787", "GO:0008251", "GO:0003824", "GO:0046872", 
"GO:0008033", "GO:0016787", "GO:0008270", "GO:0002100", "GO:0052717", 
"GO:0005524", "GO:0006355", "GO:0003677", "GO:0005524", "GO:0005737", 
"GO:0000166", "GO:0003677", "GO:0005737", "GO:0016747", "GO:0016740", 
"GO:0006310", "GO:0015074", "GO:0003677", "GO:0006310", "GO:0003677", 
"GO:0003677", "GO:0016020", "GO:0006355", "GO:0003677", "GO:0016020", 
"GO:0006310", "GO:0006281", "GO:0008276", "GO:0006479", "GO:0016740", 
"GO:0005840", "GO:0005737", "GO:0032259", "GO:0008168", "GO:0005737", 
"GO:0008276", "GO:0005737", "GO:0006457", "GO:0005524", "GO:0051082", 
"GO:0140662", "GO:0005524", "GO:0000166", "GO:0005524", "GO:0016830", 
"GO:0019752", "GO:0030170", "GO:0003824", "GO:0016829", "GO:0004058", 
"GO:0005737", "GO:0003723", "GO:0001514", "GO:0005525", "GO:0003746", 
"GO:0003924", "GO:0006414", "GO:0000166", "GO:0005525", "GO:0003746", 
"GO:0016020", "GO:0016740", "GO:0016740", "GO:0035600", "GO:0035596", 
"GO:0006400", "GO:0051539", "GO:0051536", "GO:0035598", "GO:0003824", 
"GO:0046872", "GO:0016740", "GO:0005737", "GO:0051536", "GO:0051539", 
"GO:0016020", "GO:0008610", "GO:0008654", "GO:0016301"), GO_NAME = c("small molecule binding", 
"metal ion binding", "oxidoreductase activity", "membrane", "membrane", 
"plasma membrane", "plasma membrane", "plasma membrane", "DNA binding", 
"cytoplasm", "DNA binding", "nucleoid", "bacterial nucleoid", 
"cytoplasm", "DNA binding", "zinc ion binding", "tRNA wobble adenosine to inosine editing", 
"hydrolase activity", "tRNA-specific adenosine deaminase activity", 
"catalytic activity", "metal ion binding", "tRNA processing", 
"hydrolase activity", "zinc ion binding", "tRNA wobble adenosine to inosine editing", 
"tRNA-specific adenosine-34 deaminase activity", "ATP binding", 
"regulation of DNA-templated transcription", "DNA binding", "ATP binding", 
"cytoplasm", "nucleotide binding", "DNA binding", "cytoplasm", 
"acyltransferase activity, transferring groups other than amino-acyl groups", 
"transferase activity", "DNA recombination", "DNA integration", 
"DNA binding", "DNA recombination", "DNA binding", "DNA binding", 
"membrane", "regulation of DNA-templated transcription", "DNA binding", 
"membrane", "DNA recombination", "DNA repair", "protein methyltransferase activity", 
"protein methylation", "transferase activity", "ribosome", "cytoplasm", 
"methylation", "methyltransferase activity", "cytoplasm", "protein methyltransferase activity", 
"cytoplasm", "protein folding", "ATP binding", "unfolded protein binding", 
"ATP-dependent protein folding chaperone", "ATP binding", "nucleotide binding", 
"ATP binding", "carbon-carbon lyase activity", "carboxylic acid metabolic process", 
"pyridoxal phosphate binding", "catalytic activity", "lyase activity", 
"aromatic-L-amino-acid decarboxylase activity", "cytoplasm", 
"RNA binding", "selenocysteine incorporation", "GTP binding", 
"translation elongation factor activity", "GTPase activity", 
"translational elongation", "nucleotide binding", "GTP binding", 
"translation elongation factor activity", "membrane", "transferase activity", 
"transferase activity", "tRNA methylthiolation", "methylthiotransferase activity", 
"tRNA modification", "4 iron, 4 sulfur cluster binding", "iron-sulfur cluster binding", 
"N6-threonylcarbomyladenosine methylthiotransferase activity", 
"catalytic activity", "metal ion binding", "transferase activity", 
"cytoplasm", "iron-sulfur cluster binding", "4 iron, 4 sulfur cluster binding", 
"membrane", "lipid biosynthetic process", "phospholipid biosynthetic process", 
"kinase activity"), REFERENCE = c("GO_REF:0000002", "GO_REF:0000043", 
"GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000043", "GO_REF:0000043", 
"GO_REF:0000044", "GO_REF:0000104", "GO_REF:0000002", "GO_REF:0000043", 
"GO_REF:0000043", "GO_REF:0000044", "GO_REF:0000104", "GO_REF:0000104", 
"GO_REF:0000104", "GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000002", 
"GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000043", "GO_REF:0000043", 
"GO_REF:0000043", "GO_REF:0000104", "GO_REF:0000104", "GO_REF:0000104", 
"GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000043", 
"GO_REF:0000043", "GO_REF:0000043", "GO_REF:0000043", "GO_REF:0000044", 
"GO_REF:0000002", "GO_REF:0000043", "GO_REF:0000002", "GO_REF:0000002", 
"GO_REF:0000002", "GO_REF:0000043", "GO_REF:0000043", "GO_REF:0000104", 
"GO_REF:0000043", "GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000043", 
"GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000002", 
"GO_REF:0000043", "GO_REF:0000043", "GO_REF:0000043", "GO_REF:0000043", 
"GO_REF:0000043", "GO_REF:0000044", "GO_REF:0000104", "GO_REF:0000104", 
"GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000002", 
"GO_REF:0000043", "GO_REF:0000043", "GO_REF:0000104", "GO_REF:0000002", 
"GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000043", 
"GO_REF:0000117", "GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000002", 
"GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000043", 
"GO_REF:0000043", "GO_REF:0000043", "GO_REF:0000043", "GO_REF:0000043", 
"GO_REF:0000043", "GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000002", 
"GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000002", "GO_REF:0000002", 
"GO_REF:0000002", "GO_REF:0000043", "GO_REF:0000043", "GO_REF:0000043", 
"GO_REF:0000043", "GO_REF:0000043", "GO_REF:0000002", "GO_REF:0000002", 
"GO_REF:0000002", "GO_REF:0000002"), TAXON_ID = c(1075091, 1075091, 
1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 
1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 
1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 
1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 
1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 
1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 
1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 
1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 
1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 
1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 
1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 
1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 
1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 
1075091, 1075091, 1075091, 1075091, 1075091, 1075091, 1075091
)), row.names = c(NA, -100L), class = c("tbl_df", "tbl", "data.frame"
))

I want to format it into a table similar to the following:

dput(head(dat2))
structure(list(`Accession Number` = c("A0A383V1G7", "A0A383V1H7", 
"A0A383V1J2", "A0A383V1M7", "A0A383V1R6", "A0A383V1S1"), `(3R)-hydroxymyristoyl-[acyl-carrier-protein] dehydratase activity` = c("N", 
"N", "N", "N", "N", "N"), `(S)-2-(5-amino-1-(5-phospho-D-ribosyl)imidazole-4-carboxamido)succinate AMP-lyase (fumarate-forming) activity` = c("N", 
"N", "N", "N", "N", "N"), `[acyl-carrier-protein] S-malonyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `1-(5-phosphoribosyl)-5-[(5-phosphoribosylamino)methylideneamino]imidazole-4-carboxamide isomerase activity` = c("N", 
"N", "N", "N", "N", "N"), `1,4-alpha-glucan branching enzyme activity` = c("N", 
"N", "N", "N", "N", "N"), `1-alkyl-2-acetylglycerophosphocholine esterase activity` = c("N", 
"N", "N", "N", "N", "N"), `1-deoxy-D-xylulose-5-phosphate reductoisomerase activity` = c("N", 
"N", "N", "N", "N", "N"), `1-deoxy-D-xylulose-5-phosphate synthase activity` = c("N", 
"N", "N", "N", "N", "N"), `2 iron, 2 sulfur cluster binding` = c("N", 
"N", "N", "N", "N", "N"), `2-amino-4-hydroxy-6-hydroxymethyldihydropteridine diphosphokinase activity` = c("N", 
"N", "N", "N", "N", "N"), `2-C-methyl-D-erythritol 2,4-cyclodiphosphate synthase activity` = c("N", 
"N", "N", "N", "N", "N"), `2-C-methyl-D-erythritol 4-phosphate cytidylyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `2-isopropylmalate synthase activity` = c("N", 
"N", "N", "N", "N", "N"), `2-methyl-6-phytyl-1,4-benzoquinone methyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `2-phytyl-1,4-naphthoquinone methyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `3 iron, 4 sulfur cluster binding` = c("N", 
"N", "N", "N", "N", "N"), `3'(2'),5'-bisphosphate nucleotidase activity` = c("N", 
"N", "N", "N", "N", "N"), `3,4-dihydroxy-2-butanone-4-phosphate synthase activity` = c("N", 
"N", "N", "N", "N", "N"), `3,8-divinyl protochlorophyllide a 8-vinyl reductase activity` = c("N", 
"N", "N", "N", "N", "N"), `3'-5' exonuclease activity` = c("N", 
"N", "N", "N", "N", "N"), `3'-5'-exoribonuclease activity` = c("N", 
"N", "N", "N", "N", "N"), `3-beta-hydroxy-delta5-steroid dehydrogenase activity` = c("N", 
"N", "N", "N", "N", "N"), `3-dehydroquinate dehydratase activity` = c("N", 
"N", "N", "N", "N", "N"), `3-dehydroquinate synthase activity` = c("N", 
"N", "N", "N", "N", "N"), `3-deoxy-7-phosphoheptulonate synthase activity` = c("N", 
"N", "N", "N", "N", "N"), `3-hydroxyacyl-[acyl-carrier-protein] dehydratase activity` = c("N", 
"N", "N", "N", "N", "N"), `3-hydroxybutyrate dehydrogenase activity` = c("N", 
"N", "N", "N", "N", "N"), `3-hydroxydecanoyl-[acyl-carrier-protein] dehydratase activity` = c("N", 
"N", "N", "N", "N", "N"), `3-hydroxyoctanoyl-[acyl-carrier-protein] dehydratase activity` = c("N", 
"N", "N", "N", "N", "N"), `3-hydroxypalmitoyl-[acyl-carrier-protein] dehydratase activity` = c("N", 
"N", "N", "N", "N", "N"), `3-isopropylmalate dehydrogenase activity` = c("N", 
"N", "N", "N", "N", "N"), `3-oxoacyl-[acyl-carrier-protein] reductase (NADPH) activity` = c("N", 
"N", "N", "N", "N", "N"), `3-oxoacyl-[acyl-carrier-protein] synthase activity` = c("N", 
"N", "N", "N", "N", "N"), `3-oxo-pimeloyl-[acp] methyl ester reductase activity` = c("N", 
"N", "N", "N", "N", "N"), `3-phosphoshikimate 1-carboxyvinyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `4 iron, 4 sulfur cluster binding` = c("N", 
"N", "N", "N", "N", "N"), `4-(cytidine 5'-diphospho)-2-C-methyl-D-erythritol kinase activity` = c("N", 
"N", "N", "N", "N", "N"), `4-alpha-glucanotransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `4-alpha-hydroxytetrahydrobiopterin dehydratase activity` = c("N", 
"N", "N", "N", "N", "N"), `4-hydroxy-3-methylbut-2-en-1-yl diphosphate synthase activity` = c("N", 
"N", "N", "N", "N", "N"), `4-hydroxy-4-methyl-2-oxoglutarate aldolase activity` = c("N", 
"N", "N", "N", "N", "N"), `4-hydroxyphenylpyruvate dioxygenase activity` = c("N", 
"N", "N", "N", "N", "N"), `4-hydroxy-tetrahydrodipicolinate reductase` = c("N", 
"N", "N", "N", "N", "N"), `4-hydroxy-tetrahydrodipicolinate synthase` = c("N", 
"N", "N", "N", "N", "N"), `5'-3' DNA helicase activity` = c("N", 
"N", "N", "N", "N", "N"), `5'-3' exonuclease activity` = c("N", 
"N", "N", "N", "N", "N"), `5'-3' RNA polymerase activity` = c("N", 
"N", "N", "N", "N", "N"), `5'-deoxynucleotidase activity` = c("N", 
"N", "N", "N", "N", "N"), `5-methyltetrahydropteroyltriglutamate-homocysteine S-methyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `5-methyltetrahydropteroyltri-L-glutamate-dependent methyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `5S rRNA binding` = c("N", "N", "N", 
"N", "N", "N"), `6,7-dimethyl-8-ribityllumazine synthase activity` = c("N", 
"N", "N", "N", "N", "N"), `6-phosphofructokinase activity` = c("N", 
"N", "N", "N", "N", "N"), `6-phosphogluconolactonase activity` = c("N", 
"N", "N", "N", "N", "N"), `7S RNA binding` = c("N", "N", "N", 
"N", "N", "N"), `acetate kinase activity` = c("N", "N", "N", 
"N", "N", "N"), `acetate-CoA ligase activity` = c("N", "N", "N", 
"N", "N", "N"), `acetolactate synthase activity` = c("N", "N", 
"N", "N", "N", "N"), `acetolactate synthase regulator activity` = c("N", 
"N", "N", "N", "N", "N"), `acetyl-CoA carboxylase activity` = c("N", 
"N", "N", "N", "N", "N"), `acetyl-CoA:L-glutamate N-acetyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `acetylglutamate kinase activity` = c("N", 
"N", "N", "N", "N", "N"), `acetyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `acid phosphatase activity` = c("N", 
"N", "N", "N", "N", "N"), `acid-amino acid ligase activity` = c("N", 
"N", "N", "N", "N", "N"), `acid-ammonia (or amide) ligase activity` = c("N", 
"N", "N", "N", "N", "N"), `acid-thiol ligase activity` = c("N", 
"N", "N", "N", "N", "N"), `acireductone dioxygenase [iron(II)-requiring] activity` = c("N", 
"N", "N", "N", "N", "N"), `ACP phosphopantetheine attachment site binding` = c("N", 
"N", "N", "N", "N", "N"), `ACP phosphopantetheine attachment site binding involved in fatty acid biosynthetic process` = c("N", 
"N", "N", "N", "N", "N"), `actin binding` = c("N", "N", "N", 
"N", "N", "N"), `active ion transmembrane transporter activity` = c("N", 
"N", "N", "N", "N", "N"), `active transmembrane transporter activity` = c("N", 
"N", "N", "N", "N", "N"), `acyl-[acyl-carrier-protein] desaturase activity` = c("N", 
"N", "N", "N", "N", "N"), `acyl-CoA dehydrogenase activity` = c("N", 
"N", "N", "N", "N", "N"), `acyl-CoA oxidase activity` = c("N", 
"N", "N", "N", "N", "N"), `acylglycerol O-acyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `adenine nucleotide transmembrane transporter activity` = c("N", 
"N", "N", "N", "N", "N"), `adenosine kinase activity` = c("N", 
"N", "N", "N", "N", "N"), `adenosylhomocysteinase activity` = c("N", 
"N", "N", "N", "N", "N"), `adenyl nucleotide binding` = c("N", 
"N", "Y", "N", "N", "N"), `adenyl ribonucleotide binding` = c("N", 
"N", "Y", "N", "N", "N"), `adenylate kinase activity` = c("N", 
"N", "N", "N", "N", "N"), `adenyl-nucleotide exchange factor activity` = c("N", 
"N", "N", "N", "N", "N"), `adenylosuccinate synthase activity` = c("N", 
"N", "N", "N", "N", "N"), `adenylylsulfatase activity` = c("N", 
"N", "N", "N", "N", "N"), `adenylylsulfate kinase activity` = c("N", 
"N", "N", "N", "N", "N"), `adenylyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `ADP transmembrane transporter activity` = c("N", 
"N", "N", "N", "N", "N"), `alanine-glyoxylate transaminase activity` = c("N", 
"N", "N", "N", "N", "N"), `alanine-tRNA ligase activity` = c("N", 
"N", "N", "N", "N", "N"), `alcohol dehydrogenase (NAD) activity` = c("N", 
"N", "N", "N", "N", "N"), `aldehyde dehydrogenase (NAD) activity` = c("N", 
"N", "N", "N", "N", "N"), `aldehyde-lyase activity` = c("N", 
"N", "N", "N", "N", "N"), `aliphatic-amine oxidase activity` = c("N", 
"N", "N", "N", "N", "N"), `alkali metal ion binding` = c("N", 
"N", "N", "N", "N", "N"), `alpha,alpha-trehalose-phosphate synthase (UDP-forming) activity` = c("N", 
"N", "N", "N", "N", "N"), `alpha-amylase activity` = c("N", "N", 
"N", "N", "N", "N"), `alpha-galactosidase activity` = c("N", 
"N", "N", "N", "N", "N"), `amide binding` = c("N", "N", "N", 
"N", "N", "N"), `amidine-lyase activity` = c("N", "N", "N", "N", 
"N", "N"), `amidophosphoribosyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `amine-lyase activity` = c("N", "N", 
"N", "N", "N", "N"), `amino acid binding` = c("N", "N", "N", 
"N", "N", "N"), `amino acid kinase activity` = c("N", "N", "N", 
"N", "N", "N"), `aminoacetone:oxygen oxidoreductase(deaminating) activity` = c("N", 
"N", "N", "N", "N", "N"), `aminoacyl-tRNA editing activity` = c("N", 
"N", "N", "N", "N", "N"), `aminoacyl-tRNA ligase activity` = c("N", 
"N", "N", "N", "N", "N"), `aminomethyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `aminopeptidase activity` = c("N", 
"N", "N", "N", "N", "N"), `ammonia ligase activity` = c("N", 
"N", "N", "N", "N", "N"), `ammonia-lyase activity` = c("N", "N", 
"N", "N", "N", "N"), `AMP binding` = c("N", "N", "N", "N", "N", 
"N"), `amylase activity` = c("N", "N", "N", "N", "N", "N"), `anion binding` = c("N", 
"N", "Y", "N", "N", "N"), `anion channel activity` = c("N", "N", 
"N", "N", "N", "N"), `anion transmembrane transporter activity` = c("N", 
"N", "N", "N", "N", "N"), `anion:anion antiporter activity` = c("N", 
"N", "N", "N", "N", "N"), `anthranilate phosphoribosyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `anthranilate synthase activity` = c("N", 
"N", "N", "N", "N", "N"), `antioxidant activity` = c("N", "N", 
"N", "N", "N", "N"), `antiporter activity` = c("N", "N", "N", 
"N", "N", "N"), `arginine N-methyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `arginine-tRNA ligase activity` = c("N", 
"N", "N", "N", "N", "N"), `argininosuccinate lyase activity` = c("N", 
"N", "N", "N", "N", "N"), `argininosuccinate synthase activity` = c("N", 
"N", "N", "N", "N", "N"), `arogenate dehydratase activity` = c("N", 
"N", "N", "N", "N", "N"), `asparagine synthase (glutamine-hydrolyzing) activity` = c("N", 
"N", "N", "N", "N", "N"), `asparagine-tRNA ligase activity` = c("N", 
"N", "N", "N", "N", "N"), `asparaginyl-tRNA synthase (glutamine-hydrolyzing) activity` = c("N", 
"N", "N", "N", "N", "N"), `aspartate carbamoyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `aspartate kinase activity` = c("N", 
"N", "N", "N", "N", "N"), `aspartate-semialdehyde dehydrogenase activity` = c("N", 
"N", "N", "N", "N", "N"), `aspartic-type endopeptidase activity` = c("N", 
"N", "N", "N", "N", "N"), `aspartic-type peptidase activity` = c("N", 
"N", "N", "N", "N", "N"), `ATP binding` = c("N", "N", "Y", "N", 
"N", "N"), `ATP citrate synthase activity` = c("N", "N", "N", 
"N", "N", "N"), `ATP phosphoribosyltransferase activity` = c("N", 
"N", "N", "N", "N", "N"), `ATP transmembrane transporter activity` = c("N", 
"N", "N", "N", "N", "N"), `ATP:ADP antiporter activity` = c("N", 
"N", "N", "N", "N", "N"), `ATPase activator activity` = c("N", 
"N", "N", "N", "N", "N"), `ATPase activity` = c("N", "N", "Y", 
"N", "N", "N"), `ATPase activity, coupled` = c("N", "N", "N", 
"N", "N", "N")), row.names = c(NA, -6L), class = c("tbl_df", 
"tbl", "data.frame"))

Where if the protein on the left matches the GO term, then I want that cell to have either a Y or 1, then if that protein doesn't match that GO term there is a N or zero. I'm thinking it's some sort of pivot_wider command, but I can't quite figure it out. Any insight would be greatly useful.

Sorry, I don't understand your data sets at all. Here is the first row of the first data set.

 GENE_PRODUCT_ID    GO_TERM                GO_NAME      REFERENCE TAXON_ID
1      A0A7U9C483 GO:0036094 small molecule binding GO_REF:0000002  1075091

What would it mean for "the protein on the left matches the GO term"?
Here is the second column name in your second data set

`(3R)-hydroxymyristoyl-[acyl-carrier-protein] dehydratase activity`

There is no value in the first data set that is anything like that, so it cannot be the result of a pivot. Where does it come from?

1 Like

So the second data set is an example of what I am trying to make. So the accession number in the second spreadsheet is the same as the GENE_PRODUCT_ID in the first, except they are based from different data. I want to creat a table like the second, where each GO_NAME value is a new column. Then the first column would be the individual GENE_PRODUCT_ID (Accession number). Then across the row, if that GENE_PRODUCT_ID is assigned to that GO_NAME, then I want there to be a Y in that cell. Some of the GENE_PRODUCT_ID values are assigned to more than one GO_NAME, so there may be multiple Y's across the rows. For example, A0A7U9C483 would have a Y in the columns labeled "Small Molecule Binding" and "metal ion binding", but N's in the rest of the columns. Does this make more sense?

I named your original data set DF. The code below makes a data frame named DFwide2 that has one row for each value of DF$GENE_PRODUCT_ID and one column for each value of DF$GO_NAME. The columns contain TRUE if the given GENE_PRODUCT_ID had an entry for that GO_NAME. Does that work for you?

library(tidyr)
library(dplyr)
DF$Flag <- "Y"
DFwide <- DF |> 
  pivot_wider(names_from = "GO_NAME", values_from = "Flag") |> 
  select(-GO_TERM, -REFERENCE, -TAXON_ID)
  
DFwide2 <- DFwide |> group_by(GENE_PRODUCT_ID) |> 
  summarize(across(everything(), function(COL) any(grepl("Y", COL))))

DFwide2[1:3,1:3]
# A tibble: 3 × 3
  GENE_PRODUCT_ID `small molecule binding` `metal ion binding`
  <chr>           <lgl>                    <lgl>              
1 A0A7U9C483      TRUE                     TRUE               
2 A0A7U9C489      FALSE                    FALSE              
3 A0A7U9C490      FALSE                    FALSE      

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.