Spaces:

uc-ctds
/

llama-data-model-generator-demo

Running on A100

App Files Files Community

avantol commited on Jul 21

Commit

68d7e91

1 Parent(s): 800f2c1

fix(app): simplify usage, fix samples

Browse files

Files changed (16) hide show

.secrets.baseline +12 -6
app.py +11 -46
requirements.txt +2 -1
schema_to_sql.py +1 -1
serialized_file_creation_demo/README.md +5 -0
serialized_file_creation_demo/gen3_dm_scaffold.json +0 -0
serialized_file_creation_demo/serialized_file_creation_demo.ipynb +1 -1
serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/SDM_0__submitted_genotyping_array.mass_cytometry_image.actionable_mutation__jsonschema_dd.json +598 -1
serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/actionable_mutation_metadata.tsv +1 -1
serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/aliquot_metadata.tsv +1 -1
serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/analyte_metadata.tsv +1 -1
serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/case_metadata.tsv +1 -1
serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/mass_cytometry_assay_file_manifest.tsv +1 -1
serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/mass_cytometry_image_file_manifest.tsv +1 -1
serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/submitted_genotyping_array.mass_cytometry_image.actionable_mutation_paths.json +34 -1
serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/submitted_genotyping_array_file_manifest.tsv +1 -1

.secrets.baseline CHANGED Viewed

@@ -14,10 +14,6 @@
       "name": "Base64HighEntropyString",
       "limit": 4.5
     },
-    {
-      "name": "HuggingFaceTokenDetector",
-      "path": "file://hf_token_plugin.py"
-    },
     {
       "name": "BasicAuthDetector"
     },
@@ -126,6 +122,16 @@
       "path": "detect_secrets.filters.heuristic.is_templated_secret"
     }
   ],
-  "results": {},
-  "generated_at": "2025-07-14T21:35:18Z"
 }

       "name": "Base64HighEntropyString",
       "limit": 4.5
     },
     {
       "name": "BasicAuthDetector"
     },
       "path": "detect_secrets.filters.heuristic.is_templated_secret"
     }
   ],
+  "results": {
+    "serialized_file_creation_demo/gen3_dm_scaffold.json": [
+      {
+        "type": "Hex High Entropy String",
+        "filename": "serialized_file_creation_demo/gen3_dm_scaffold.json",
+        "hashed_secret": "0bf50a968d39c25aaf2ac4636505adda571f17bd",
+        "is_verified": false,
+        "line_number": 683
+      }
+    ]
+  },
+  "generated_at": "2025-07-21T13:56:51Z"
 }

app.py CHANGED Viewed

@@ -10,7 +10,6 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 from schema_to_sql import dd_to_sql
 from utils import (
-    create_graph_image_from_json,
     create_summary_tables,
     get_example_ai_model_output,
     get_prompt_with_files_uploaded,
@@ -26,7 +25,7 @@ AUTH_TOKEN = os.environ.get("HF_TOKEN", False)
 BASE_MODEL = "meta-llama/Llama-3.1-8B-Instruct"
 LORA_ADAPTER = "uc-ctds/data-model-curator"
-MAX_RETRY_ATTEMPTS = 1
 print(f"Is CUDA available: {torch.cuda.is_available()}")
 print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
@@ -50,7 +49,7 @@ except Exception:
     # continue on so setup instructions load
-@spaces.GPU(duration=360)
 def run_llm_inference(model_prompt):
     retry_count = 1
@@ -125,11 +124,12 @@ def gen_output_from_files_uploaded(filepaths: list[str] = None):
     model_response = run_llm_inference(prompt_from_tsv_upload)
     model_response_json = json.loads(model_response)
-    # Create Graph Network Image
-    graph_network_img = create_graph_image_from_json(model_response_json)
     # Create SQL Code
-    sql, validation = dd_to_sql(model_response_json)
     # Create Summary Table
     nodes_df, properties_df = {}, {}
@@ -138,16 +138,15 @@ def gen_output_from_files_uploaded(filepaths: list[str] = None):
     except Exception as exc:
         print(f"summary table creation failed: {exc}")
-    return model_response, graph_network_img, sql, nodes_df, properties_df
 def gen_output_from_example():
     model_response = get_example_ai_model_output()
     model_response_json = json.loads(model_response)
-    graph_network_img = create_graph_image_from_json(model_response_json)
     sql, validation = dd_to_sql(model_response_json)
-    return model_response, graph_network_img, sql
 def zip_tsvs():
@@ -166,36 +165,6 @@ def zip_tsvs():
 with gr.Blocks() as demo:
     gr.Markdown("# Demonstration of Llama Data Model Generator")
-    gr.Markdown("## IMPORTANT Setup")
-    gr.Markdown(
-        "This demonstrates usage of our [Llama Data Model Generator](https://huggingface.co/uc-ctds/llama-data-model-generator). "
-        "We fine-tuned the base [Llama 3.1 8B Instruct model](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct), "
-        "so you must be approved to access it to use this space. Please follow the previous links and gain access to the "
-        "gated models before proceeding."
-    )
-    gr.Markdown(
-        "After gaining access, you must **duplicate this space** and add a secret variable HF_TOKEN in the settings. "
-        "See [Official Huggingface docs](https://huggingface.co/docs/hub/security-tokens) on how to generate a token. "
-        "It should only have `read` access. Note: this is due to a limitation in Huggingface Spaces and end-user "
-        "access to gated models."
-    )
-    gr.Image("duplicate.png", label="How to duplicate this space")
-    gr.Markdown(
-        "Ensure you set your duplicated space to **private** and enter your HF_TOKEN. See below: "
-    )
-    gr.Image("duplicate_dialog.png", label="Duplicate instructions")
-    gr.Markdown("**IMPORTANT:** Only continue after doing the above.")
-    gr.Markdown("## Already do the above? Is this your duplicated space?")
-    gr.Markdown("Awesome! Let's test this out!")
     gr.Markdown("## (Optional) Get Sample TSV(s) to Upload")
     gr.Markdown("### Example 1: A single TSV")
@@ -245,23 +214,19 @@ with gr.Blocks() as demo:
                 container=True,
             )
-    with gr.Row():
-        with gr.Column(scale=7):
-            graph_out = gr.Image(label="Network Graph Representation", type="pil")
     # If files are uploaded, generate prompt and run model
     if model_loaded:
         files.upload(
             fn=gen_output_from_files_uploaded,
             inputs=files,
-            outputs=[json_out, graph_out, sql_out],
         )
     gr.Markdown("Run out of FreeGPU or having issues? Try the example output!")
     demo_btn = gr.Button("Manually Load Example Output from Previous Run")
     demo_btn.click(
         fn=gen_output_from_example,
-        outputs=[json_out, graph_out, sql_out],
     )
 if __name__ == "__main__":

 from schema_to_sql import dd_to_sql
 from utils import (
     create_summary_tables,
     get_example_ai_model_output,
     get_prompt_with_files_uploaded,
 BASE_MODEL = "meta-llama/Llama-3.1-8B-Instruct"
 LORA_ADAPTER = "uc-ctds/data-model-curator"
+MAX_RETRY_ATTEMPTS = 3
 print(f"Is CUDA available: {torch.cuda.is_available()}")
 print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
     # continue on so setup instructions load
+@spaces.GPU(duration=450)
 def run_llm_inference(model_prompt):
     retry_count = 1
     model_response = run_llm_inference(prompt_from_tsv_upload)
     model_response_json = json.loads(model_response)
     # Create SQL Code
+    try:
+        sql, validation = dd_to_sql(model_response_json)
+    except Exception:
+        print(f"Errors converting to SQL, skipping...")
+        sql = ""
     # Create Summary Table
     nodes_df, properties_df = {}, {}
     except Exception as exc:
         print(f"summary table creation failed: {exc}")
+    return model_response, sql, nodes_df, properties_df
 def gen_output_from_example():
     model_response = get_example_ai_model_output()
     model_response_json = json.loads(model_response)
     sql, validation = dd_to_sql(model_response_json)
+    return model_response, sql
 def zip_tsvs():
 with gr.Blocks() as demo:
     gr.Markdown("# Demonstration of Llama Data Model Generator")
     gr.Markdown("## (Optional) Get Sample TSV(s) to Upload")
     gr.Markdown("### Example 1: A single TSV")
                 container=True,
             )
     # If files are uploaded, generate prompt and run model
     if model_loaded:
         files.upload(
             fn=gen_output_from_files_uploaded,
             inputs=files,
+            outputs=[json_out, sql_out],
         )
     gr.Markdown("Run out of FreeGPU or having issues? Try the example output!")
     demo_btn = gr.Button("Manually Load Example Output from Previous Run")
     demo_btn.click(
         fn=gen_output_from_example,
+        outputs=[json_out, sql_out],
     )
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
-torch==2.5.1
 transformers==4.50.0
 pydantic==2.10.6
 gradio==5.35.0
@@ -8,3 +7,5 @@ vllm==0.6.4.post1
 peft
 Pillow
 spaces

 transformers==4.50.0
 pydantic==2.10.6
 gradio==5.35.0
 peft
 Pillow
 spaces
+--extra-index-url https://download.pytorch.org/whl/cu113
+torch==2.5.1

schema_to_sql.py CHANGED Viewed

@@ -135,7 +135,7 @@ def generate_create_table(node, table_lookup):
             parent_cols = table_lookup.get(parent, {})
             if parent_field in parent_cols:
                 fk_constraints.append(
-                    f'  FOREIGN KEY ("{sql_col}") REFERENCES {parent}("{ref_col}")'
                 )
             else:
                 fk_constraints.append(

             parent_cols = table_lookup.get(parent, {})
             if parent_field in parent_cols:
                 fk_constraints.append(
+                    f'  FOREIGN KEY ("{sql_col}") REFERENCES "{parent}"("{ref_col}")'
                 )
             else:
                 fk_constraints.append(

serialized_file_creation_demo/README.md ADDED Viewed

	@@ -0,0 +1,5 @@

+# Serialized File Creation Demo
+[This Jupyter notebook](./serialized_file_creation_demo.ipynb) demonstrates how to use the AI-assisted data model output (originally just a collection of TSV files) to a serialized file, a [PFB (Portable Format for Bioinformatics)](https://pmc.ncbi.nlm.nih.gov/articles/PMC10035862/) file.
+PFB is widely used within NIH-funded initiativies that our center is a part of, as a means for efficient storage and transfer of data between systems.

serialized_file_creation_demo/gen3_dm_scaffold.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

serialized_file_creation_demo/serialized_file_creation_demo.ipynb CHANGED Viewed

@@ -280,7 +280,7 @@
     "pfb_data = os.path.join(sdm_dir, Path(out_file).stem + \"_data.avro\")\n",
     "!pfb from -o $pfb_data json -s $pfb_schema --program DEV --project test $sdm_dir/tsv_metadata\n",
     "if Path(pfb_data).exists():\n",
-    "    print(f\"PFB containing TSV files written to:\\n{pfb_data}.\")"
    ]
   },
   {

     "pfb_data = os.path.join(sdm_dir, Path(out_file).stem + \"_data.avro\")\n",
     "!pfb from -o $pfb_data json -s $pfb_schema --program DEV --project test $sdm_dir/tsv_metadata\n",
     "if Path(pfb_data).exists():\n",
+    "    print(f\"PFB containing TSV files written to:\\n{pfb_data}\")"
    ]
   },
   {

serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/SDM_0__submitted_genotyping_array.mass_cytometry_image.actionable_mutation__jsonschema_dd.json CHANGED Viewed

	@@ -1 +1,598 @@
1	- ~~Invalid username or password.~~

+{
+    "nodes": [
+        {
+            "name": "project",
+            "description": "Any specifically defined piece of work that is undertaken or attempted to meet a single requirement. (NCIt C47885)",
+            "links": [],
+            "required": [
+                "availability_type",
+                "project.id"
+            ],
+            "properties": [
+                {
+                    "name": "address",
+                    "description": "",
+                    "type": "string"
+                },
+                {
+                    "name": "availability_type",
+                    "description": "Is the project open or restricted?",
+                    "type": "enum"
+                },
+                {
+                    "name": "brief_summary",
+                    "description": "A short description of the clinical study, including a brief statement of the clinical study's hypothesis, written in language intended for the lay public.",
+                    "type": "string"
+                },
+                {
+                    "name": "collaborators",
+                    "description": "Other organizations (if any) providing support. Support may include funding, design, implementation, data analysis or reporting. The responsible party is responsible for confirming all collaborators before listing them.",
+                    "type": "string"
+                },
+                {
+                    "name": "coverage",
+                    "description": "The spatial or temporal topic of the resource, the spatial applicability of the resource, or the jurisdiction under which the resource is relevant. Spatial topic and spatial applicability may be a named place or a location specified by its geographic coordinates. Temporal topic may be a named period, date, or date range. A jurisdiction may be a named administrative entity or a geographic place to which the resource applies. Recommended best practice is to use a controlled vocabulary such as the Thesaurus of Geographic Names [TGN] (http://www.getty.edu/research/tools/vocabulary/tgn/index.html). Where appropriate, named places or time periods can be used in preference to numeric identifiers such as sets of coordinates or date ranges.",
+                    "type": "string"
+                },
+                {
+                    "name": "data_contributor",
+                    "description": "The name of the organization or individual that the contributed dataset belongs to.",
+                    "type": "string"
+                },
+                {
+                    "name": "data_type",
+                    "description": "The general classification of the approach used for the study, i.e. GSA, GDA, RNA-seq.",
+                    "type": "array"
+                },
+                {
+                    "name": "data_url_doi",
+                    "description": "A URL or DOI for the source of the dataset or the contributing organization's website.",
+                    "type": "string"
+                },
+                {
+                    "name": "disclaimer",
+                    "description": "The disclaimers that are needed to use the following dataset outside of its source location.",
+                    "type": "string"
+                },
+                {
+                    "name": "estimated_study_completion",
+                    "description": "The estimated date that the study will be completed/published.",
+                    "type": "string"
+                },
+                {
+                    "name": "institution",
+                    "description": "Public or Private entity, including Government Agencies.",
+                    "type": "array"
+                },
+                {
+                    "name": "primary_site",
+                    "description": "The primary body site studied in this dataset.",
+                    "type": "string"
+                },
+                {
+                    "name": "project.id",
+                    "description": "A unique identifier for records in this 'project' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "project_sponsor",
+                    "description": "the name of an agency, institution, consortium, or other body that oversees the projects and resources.  For academic programs that center around a lab or individual use the department or consortium name.",
+                    "type": "string"
+                },
+                {
+                    "name": "protocol",
+                    "description": "If a JCOIN hub study, the category of study as defined by Ducharme et al., 2021. Journal of Substance Abuse Treatment publication.",
+                    "type": "enum"
+                },
+                {
+                    "name": "release_requested",
+                    "description": "User requests that the GDC release the project. Release can only be requested if the project is releasable.",
+                    "type": "boolean"
+                },
+                {
+                    "name": "release_status",
+                    "description": "Release status of the study.",
+                    "type": "enum"
+                },
+                {
+                    "name": "research_program",
+                    "description": "Name of the NIH-registered Research Program.",
+                    "type": "string"
+                },
+                {
+                    "name": "submission_enabled",
+                    "description": "Indicates if submission to a project is allowed.",
+                    "type": "boolean"
+                },
+                {
+                    "name": "support_id",
+                    "description": "The ID of the source providing support/grant resources.",
+                    "type": "string"
+                }
+            ]
+        },
+        {
+            "name": "study",
+            "description": "A coordinated set of actions and observations designed to generate data, with the ultimate goal of discovery or hypothesis testing.",
+            "links": [
+                "project"
+            ],
+            "required": [
+                "project.id",
+                "study.id",
+                "study_description"
+            ],
+            "properties": [
+                {
+                    "name": "data_description",
+                    "description": "Brief description of the data being provided for this study. Free text",
+                    "type": "string"
+                },
+                {
+                    "name": "study_completeness",
+                    "description": "Description of data status. 0=Descriptive data and results as originally received from the data provider. 1=Includes updates to the original data submission short of completeness. 2=Complete set of descriptive data and results, as ascertained by curator.",
+                    "type": "enum"
+                },
+                {
+                    "name": "study_description",
+                    "description": "A brief description of the study being performed. Free text",
+                    "type": "string"
+                },
+                {
+                    "name": "study_doi",
+                    "description": "Digital object identifier (DOI) is a type of persistent identifier used to uniquely identify objects",
+                    "type": "string"
+                },
+                {
+                    "name": "study_organization",
+                    "description": "Name of the primary organization that oversees implementation of the study",
+                    "type": "string"
+                },
+                {
+                    "name": "project.id",
+                    "description": "Unique identifiers for records in the 'project' table that relate via this foreign key to records in this 'study' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "study.id",
+                    "description": "A unique identifier for records in this 'study' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "data_url_doi",
+                    "description": "A URL or DOI for the source of the dataset or the contributing organization's website.",
+                    "type": "string"
+                },
+                {
+                    "name": "full_name",
+                    "description": "The full name or title of the dataset or publication.",
+                    "type": "string"
+                }
+            ]
+        },
+        {
+            "name": "case",
+            "description": "The collection of all data related to a specific subject in the context of a specific project.",
+            "links": [
+                "project",
+                "study"
+            ],
+            "required": [
+                "case.id",
+                "project.id"
+            ],
+            "properties": [
+                {
+                    "name": "AnchorDate",
+                    "description": "The reference or anchor date used during date obfuscation, where a single date is obscurred by creating one or more date ranges in relation to this date.",
+                    "type": "enum"
+                },
+                {
+                    "name": "project.id",
+                    "description": "Unique identifiers for records in the 'project' table that relate via this foreign key to records in this 'case' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "case.id",
+                    "description": "A unique identifier for records in this 'case' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "cohort_id",
+                    "description": "Cohort identifier",
+                    "type": "integer"
+                },
+                {
+                    "name": "cohort_name",
+                    "description": "Cohort name",
+                    "type": "string"
+                },
+                {
+                    "name": "geographic_location",
+                    "description": "Location of experiment.",
+                    "type": "string"
+                },
+                {
+                    "name": "index_date",
+                    "description": "The reference or anchor date used during date obfuscation, where a single date is obscurred by creating one or more date ranges in relation to this date.",
+                    "type": "enum"
+                },
+                {
+                    "name": "species",
+                    "description": "Taxonomic species of the subject.",
+                    "type": "enum"
+                },
+                {
+                    "name": "study.id",
+                    "description": "Unique identifiers for records in the 'study' table that relate via this foreign key to records in this 'case' table.",
+                    "type": "string"
+                }
+            ]
+        },
+        {
+            "name": "cell_subject",
+            "description": "description of subject.",
+            "links": [
+                "study",
+                "case"
+            ],
+            "required": [
+                "cell_subject.id"
+            ],
+            "properties": [
+                {
+                    "name": "cell_type",
+                    "description": "",
+                    "type": "string"
+                },
+                {
+                    "name": "isolation_protocol",
+                    "description": "",
+                    "type": "string"
+                },
+                {
+                    "name": "karyotype",
+                    "description": "",
+                    "type": "string"
+                },
+                {
+                    "name": "provenance",
+                    "description": "",
+                    "type": "string"
+                },
+                {
+                    "name": "source_organ",
+                    "description": "",
+                    "type": "string"
+                },
+                {
+                    "name": "study.id",
+                    "description": "Unique identifiers for records in the 'study' table that relate via this foreign key to records in this 'cell_subject' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "case.id",
+                    "description": "Unique identifiers for records in the 'case' table that relate via this foreign key to records in this 'cell_case' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "cell_subject.id",
+                    "description": "A unique identifier for records in this 'cell_subject' table.",
+                    "type": "string"
+                }
+            ]
+        },
+        {
+            "name": "actionable_mutation",
+            "description": "Mutations extracted from a patient's sequencing results.",
+            "links": [
+                "case"
+            ],
+            "required": [
+                "case.id",
+                "actionable_mutation.id"
+            ],
+            "properties": [
+                {
+                    "name": "ClinicallyActionable",
+                    "description": "ClinicallyActionable",
+                    "type": "number"
+                },
+                {
+                    "name": "DaysFromAnchorDateToBxResultsDate",
+                    "description": "DaysFromAnchorDateToBxResultsDate",
+                    "type": "integer"
+                },
+                {
+                    "name": "Lab",
+                    "description": "Lab",
+                    "type": "string"
+                },
+                {
+                    "name": "MutantFraction",
+                    "description": "MutantFraction",
+                    "type": "number"
+                },
+                {
+                    "name": "TrialPhase",
+                    "description": "TrialPhase",
+                    "type": "number"
+                },
+                {
+                    "name": "case.id",
+                    "description": "Unique identifiers for records in the 'case' table that relate via this foreign key to records in this 'actionable_mutation' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "actionable_mutation.id",
+                    "description": "A unique identifier for records in this 'actionable_mutation' table.",
+                    "type": "string"
+                }
+            ]
+        },
+        {
+            "name": "sample",
+            "description": "description of sample.",
+            "links": [
+                "case",
+                "cell_subject"
+            ],
+            "required": [
+                "sample.id"
+            ],
+            "properties": [
+                {
+                    "name": "provenance",
+                    "description": "template version",
+                    "type": "string"
+                },
+                {
+                    "name": "weight",
+                    "description": "The weight of the patient measured in kilograms.",
+                    "type": "number"
+                },
+                {
+                    "name": "case.id",
+                    "description": "Unique identifiers for records in the 'case' table that relate via this foreign key to records in this 'sample' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "cell_subject.id",
+                    "description": "Unique identifiers for records in the 'cell_subject' table that relate via this foreign key to records in this 'sample' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "sample.id",
+                    "description": "A unique identifier for records in this 'sample' table.",
+                    "type": "string"
+                }
+            ]
+        },
+        {
+            "name": "aliquot",
+            "description": "Pertaining to a portion of the whole; any one of two or more samples of something, of the same volume or weight.",
+            "links": [
+                "sample"
+            ],
+            "required": [
+                "sample.id",
+                "aliquot.id"
+            ],
+            "properties": [
+                {
+                    "name": "a260_a280_ratio",
+                    "description": "Numeric value that represents the sample ratio of nucleic acid absorbance at 260 nm and 280 nm, used to determine a measure of DNA purity.",
+                    "type": "number"
+                },
+                {
+                    "name": "derivitization",
+                    "description": "",
+                    "type": "string"
+                },
+                {
+                    "name": "provenance",
+                    "description": "template version",
+                    "type": "string"
+                },
+                {
+                    "name": "sample.id",
+                    "description": "Unique identifiers for records in the 'sample' table that relate via this foreign key to records in this 'aliquot' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "aliquot.id",
+                    "description": "A unique identifier for records in this 'aliquot' table.",
+                    "type": "string"
+                }
+            ]
+        },
+        {
+            "name": "submitted_genotyping_array",
+            "description": "Data file containing raw data from a genotyping array.",
+            "links": [
+                "aliquot"
+            ],
+            "required": [
+                "submitted_genotyping_array.id",
+                "file_name",
+                "data_category",
+                "aliquot.id"
+            ],
+            "properties": [
+                {
+                    "name": "data_category",
+                    "description": "Broad categorization of the contents of the data file.",
+                    "type": "enum"
+                },
+                {
+                    "name": "aliquot.id",
+                    "description": "Unique identifiers for records in the 'aliquot' table that relate via this foreign key to records in this 'submitted_genotyping_array' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "file_name",
+                    "description": "The name (or part of a name) of a file (of any type).",
+                    "type": "string"
+                },
+                {
+                    "name": "submitted_genotyping_array.id",
+                    "description": "A unique identifier for records in this 'submitted_genotyping_array' table.",
+                    "type": "string"
+                }
+            ]
+        },
+        {
+            "name": "analyte",
+            "description": "Any aspect of an aliquot used in an analysis or assay to characterize the sample. These aspects range from molecules, such as DNA and RNA, that can be extracted from the aliquot to general descriptions of the aliquot's components, such as cell count and morphology.",
+            "links": [
+                "aliquot",
+                "study"
+            ],
+            "required": [
+                "analyte_type",
+                "analyte_isolation_method",
+                "analyte.id"
+            ],
+            "properties": [
+                {
+                    "name": "analyte_isolation_method",
+                    "description": "The name or general description of the method used to isolate the analyte. Alternatively, if you have provided a protocol, put the file_name here.",
+                    "type": "string"
+                },
+                {
+                    "name": "analyte_type",
+                    "description": "Text term that represents the kind of molecular specimen analyte.",
+                    "type": "enum"
+                },
+                {
+                    "name": "frame_identifier",
+                    "description": "In an analysis of a slide, the frame denotes the region of the slide that is being examined. Within a frame are multiple cells.",
+                    "type": "string"
+                },
+                {
+                    "name": "run_identifier",
+                    "description": "The identifier given to the run during which this particular analyte was tested or evaluated. If you analyze multiple analytes through the same experimentation run, this is a good way to keep track.",
+                    "type": "string"
+                },
+                {
+                    "name": "specificity_other",
+                    "description": "If the true negative rate is something other than a number (for example, 'WT'), enter the text here.",
+                    "type": "string"
+                },
+                {
+                    "name": "aliquot.id",
+                    "description": "Unique identifiers for records in the 'aliquot' table that relate via this foreign key to records in this 'analyte' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "study.id",
+                    "description": "Unique identifiers for records in the 'study' table that relate via this foreign key to records in this 'analyte' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "analyte.id",
+                    "description": "A unique identifier for records in this 'analyte' table.",
+                    "type": "string"
+                }
+            ]
+        },
+        {
+            "name": "mass_cytometry_assay",
+            "description": "Mass cytometry is a variation of flow cytometry in which antibodies are labeled with heavy metal ion tags rather than fluorochromes. Readout is by time-of-flight mass spectrometry. This allows for the combination of many more antibody specificities in a single samples, without significant spillover between channels.",
+            "links": [
+                "analyte"
+            ],
+            "required": [
+                "mass_cytometry_assay.id",
+                "analyte.id",
+                "assay_method",
+                "md5sum"
+            ],
+            "properties": [
+                {
+                    "name": "assay_method",
+                    "description": "General methodology used to perform the assay.",
+                    "type": "enum"
+                },
+                {
+                    "name": "md5sum",
+                    "description": "The 128-bit hash value expressed as a 32 digit hexadecimal number used as a file's digital fingerprint.",
+                    "type": "string"
+                },
+                {
+                    "name": "protocol_used",
+                    "description": "The name or general description of the protocol used to run the mass cytometry assay. Alternatively, if you have provided a protocol, enter its file_name here.",
+                    "type": "string"
+                },
+                {
+                    "name": "analyte.id",
+                    "description": "Unique identifiers for records in the 'analyte' table that relate via this foreign key to records in this 'mass_cytometry_assay' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "mass_cytometry_assay.id",
+                    "description": "A unique identifier for records in this 'mass_cytometry_assay' table.",
+                    "type": "string"
+                }
+            ]
+        },
+        {
+            "name": "mass_cytometry_image",
+            "description": "Following an imaging mass cytometry experiment, the raw data output can be converted into antibody-specific images.",
+            "links": [
+                "mass_cytometry_assay"
+            ],
+            "required": [
+                "assay_target",
+                "file_name",
+                "mass_cytometry_assay.id",
+                "data_type",
+                "data_category",
+                "data_format",
+                "mass_cytometry_image.id"
+            ],
+            "properties": [
+                {
+                    "name": "assay_target",
+                    "description": "Target for the assay: can be a specific gene, protein, or otherwise.",
+                    "type": "string"
+                },
+                {
+                    "name": "data_category",
+                    "description": "Broad categorization of the contents of the data file.",
+                    "type": "enum"
+                },
+                {
+                    "name": "data_format",
+                    "description": "Format of the data files.",
+                    "type": "enum"
+                },
+                {
+                    "name": "data_type",
+                    "description": "Specific content type of the data file.",
+                    "type": "enum"
+                },
+                {
+                    "name": "file_name",
+                    "description": "The name (or part of a name) of a file (of any type).",
+                    "type": "string"
+                },
+                {
+                    "name": "object_id",
+                    "description": "The GUID of the object in the index service.",
+                    "type": "string"
+                },
+                {
+                    "name": "mass_cytometry_assay.id",
+                    "description": "Unique identifiers for records in the 'mass_cytometry_assay' table that relate via this foreign key to records in this 'mass_cytometry_image' table.",
+                    "type": "string"
+                },
+                {
+                    "name": "mass_cytometry_image.id",
+                    "description": "A unique identifier for records in this 'mass_cytometry_image' table.",
+                    "type": "string"
+                }
+            ]
+        }
+    ]
+}

serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/actionable_mutation_metadata.tsv CHANGED Viewed

	@@ -1 +1 @@
1	- ~~Invalid username or password~~.


1	+ actionable_mutation.id ClinicallyActionable DaysFromAnchorDateToBxResultsDate Lab MutantFraction TrialPhase case.id

serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/aliquot_metadata.tsv CHANGED Viewed

	@@ -1 +1 @@
1	- ~~Invalid username or password~~.


1	+ aliquot.id a260_a280_ratio cell_type derivitization isolation_protocol karyotype provenance source_organ weight study.id case.id cell_subject.id sample.id

serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/analyte_metadata.tsv CHANGED Viewed

	@@ -1 +1 @@
1	- ~~Invalid username or password~~.


1	+ analyte.id analyte_isolation_method analyte_type frame_identifier run_identifier specificity_other study.id aliquot.id

serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/case_metadata.tsv CHANGED Viewed

	@@ -1 +1 @@
1	- ~~Invalid username or password~~.

+ case.id AnchorDate address availability_type brief_summary cohort_id cohort_name collaborators coverage data_contributor data_description data_type data_url_doi disclaimer estimated_study_completion full_name geographic_location index_date institution primary_site project_sponsor protocol release_requested release_status research_program species study_completeness study_description study_doi study_organization submission_enabled support_id project.id study.id

serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/mass_cytometry_assay_file_manifest.tsv CHANGED Viewed

	@@ -1 +1 @@
1	- ~~Invalid username or password~~.


1	+ mass_cytometry_assay.id assay_method md5sum protocol_used analyte.id

serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/mass_cytometry_image_file_manifest.tsv CHANGED Viewed

	@@ -1 +1 @@
1	- ~~Invalid username or password~~.


1	+ mass_cytometry_image.id assay_target data_category data_format data_type file_name object_id mass_cytometry_assay.id

serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/submitted_genotyping_array.mass_cytometry_image.actionable_mutation_paths.json CHANGED Viewed

	@@ -1 +1,34 @@
1	- ~~Invalid username or password.~~

+{
+    "submitted_genotyping_array": [
+        "project",
+        "study",
+        "case",
+        "cell_subject",
+        "sample",
+        "aliquot"
+    ],
+    "mass_cytometry_image": [
+        "project",
+        "study",
+        "case",
+        "cell_subject",
+        "sample",
+        "aliquot",
+        "analyte",
+        "mass_cytometry_assay"
+    ],
+    "actionable_mutation": [
+        "project",
+        "study",
+        "case"
+    ],
+    "mass_cytometry_assay": [
+        "project",
+        "study",
+        "case",
+        "cell_subject",
+        "sample",
+        "aliquot",
+        "analyte"
+    ]
+}

serialized_file_creation_demo/submitted_genotyping_array.mass_cytometry_image.actionable_mutation/submitted_genotyping_array_file_manifest.tsv CHANGED Viewed

	@@ -1 +1 @@
1	- ~~Invalid username or password~~.


1	+ submitted_genotyping_array.id data_category file_name aliquot.id