<?xml version="1.0" ?>
<BioSampleSet><BioSample access="public" publication_date="2023-01-03T00:00:00.000" last_update="2023-04-12T12:03:14.000" submission_date="2023-01-04T08:31:27.690" id="32561007" accession="SAMEA14084379">   <Ids>     <Id db="BioSample" is_primary="1">SAMEA14084379</Id>     <Id db="SRA">ERS11687512</Id>   </Ids>   <Description>     <Title>Metagenome-assembled genome: ERR414448_bin.20_CONCOCT_v1.1_MAG</Title>     <Organism taxonomy_id="165190" taxonomy_name="uncultured Collinsella sp.">       <OrganismName>uncultured Collinsella sp.</OrganismName>     </Organism>     <Comment>       <Paragraph>This sample represents a Third Party Annotation (TPA) Metagenome-Assembled Genome (MAG) assembled from the metagenomic run ERR414448 of study ERP004605.</Paragraph>     </Comment>   </Description>   <Owner>     <Name>EBI</Name>   </Owner>   <Models>     <Model>Generic</Model>   </Models>   <Package display_name="Generic">Generic.1.0</Package>   <Attributes>     <Attribute attribute_name="ENA-CHECKLIST">ERC000047</Attribute>     <Attribute attribute_name="ENA-FIRST-PUBLIC">2023-01-03</Attribute>     <Attribute attribute_name="ENA-LAST-UPDATE">2023-01-03</Attribute>     <Attribute attribute_name="External Id">SAMEA14084379</Attribute>     <Attribute attribute_name="INSDC center alias">EBI</Attribute>     <Attribute attribute_name="INSDC center name">European Bioinformatics Institute</Attribute>     <Attribute attribute_name="INSDC first public">2023-01-03T00:33:41Z</Attribute>     <Attribute attribute_name="INSDC last update">2023-01-03T00:33:41Z</Attribute>     <Attribute attribute_name="INSDC status">public</Attribute>     <Attribute attribute_name="Submitter Id">ERR414448_bin.20_CONCOCT_v1.1_MAG</Attribute>     <Attribute attribute_name="assembly quality">Many fragments with little to no review of assembly other than reporting of standard assembly statistics</Attribute>     <Attribute attribute_name="assembly software">spades_v3.11.1</Attribute>     <Attribute attribute_name="binning parameters">Default</Attribute>     <Attribute attribute_name="binning software">CONCOCT v1.1</Attribute>     <Attribute attribute_name="broker name">EMG broker account, EMBL-EBI</Attribute>     <Attribute attribute_name="collection date" harmonized_name="collection_date" display_name="collection date">2008-01-01</Attribute>     <Attribute attribute_name="completeness score">89.74</Attribute>     <Attribute attribute_name="completeness software">CheckM</Attribute>     <Attribute attribute_name="contamination score">3.61</Attribute>     <Attribute attribute_name="environment (biome)" harmonized_name="env_broad_scale" display_name="broad-scale environmental context">Host-associated</Attribute>     <Attribute attribute_name="environment (feature)" harmonized_name="env_local_scale" display_name="local-scale environmental context">Human</Attribute>     <Attribute attribute_name="environment (material)" harmonized_name="env_medium" display_name="environmental medium">Digestive system</Attribute>     <Attribute attribute_name="geographic location (country and/or sea)" harmonized_name="geo_loc_name" display_name="geographic location">Spain</Attribute>     <Attribute attribute_name="geographic location (latitude)">40.416634</Attribute>     <Attribute attribute_name="geographic location (longitude)">-3.7037659</Attribute>     <Attribute attribute_name="investigation type" harmonized_name="investigation_type" display_name="investigation type">metagenome-assembled genome</Attribute>     <Attribute attribute_name="isolation_source" harmonized_name="isolation_source" display_name="isolation source">human gut metagenome</Attribute>     <Attribute attribute_name="metagenomic source">human gut metagenome</Attribute>     <Attribute attribute_name="project name" harmonized_name="project_name" display_name="project name">The gut microbiota is key to human health and disease. Metagenome-wide association studies (MGWAS) that search for disease markers in the gut microbiota, species identification according to metagenomic linkage groups (MLGs) or metagenomic clusters (MGCs), and metatranscriptomics or metaproteomics studies, all depend on a reference gene catalog, which has only been available for individual cohorts or based on reference genome or protein sequences. Here we report a high-quality integrated reference gene catalog consisting 9,879,896 genes, using 6.4 TB sequencing data derived from 1267 published and unpublished human gut metagenomes from three continents. The catalog represents a comprehensive collection of common and rare species, genes and genetic variants, and suggests individuality in the human gut microbiota. Analyses of a group of Chinese and Danish samples using the catalog revealed country-specific signatures in nutrient and xenobiotic metabolism. Our data suggest that interventions on nutrition, pollution and epidemiology should be tailored to the gut microbiota of a given population or even personalized for an individual.</Attribute>     <Attribute attribute_name="sample derived from">SAMEA2338818</Attribute>     <Attribute attribute_name="sample name" harmonized_name="sample_name" display_name="sample name">ERR414448_bin.20_CONCOCT_v1.1_MAG</Attribute>     <Attribute attribute_name="scientific_name">uncultured Collinsella sp.</Attribute>     <Attribute attribute_name="sequencing method">Illumina HiSeq 2000</Attribute>     <Attribute attribute_name="taxonomic identity marker">multi-marker approach</Attribute>   </Attributes>   <Status status="live" when="2023-01-05T08:29:28.520"/> </BioSample> </BioSampleSet>
