fh-new-vm1

Sleeping

victormiller commited on Sep 25, 2024

Commit

e55aeba

verified ·

1 Parent(s): c58264f

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -128,13 +128,14 @@ intro_text = P(
 intro_list = P("""We present TxT360, the Trillion eXtracted Text corpus, a 5.7T token dataset for pretraining projects that:""")
-intro_list1 = Ol(Li("Curates commonly used pretraining datasets, including all CommonCrawl"),Li("Employs carefully selected filters designed for each data source"))
-intro_1 = P("1. Curates commonly used pretraining datasets, including all CommonCrawl")
-intro_2 = P("2. Employs carefully selected filters designed for each data source")
-intro_3 = P("3. Provides only unique data elements via globally deduplicated across all datasets")
-intro_4 = P("4. Retains all deduplication metadata for custom upweighting")
-intro_5 = P("5. Is Production ready! Download here [link to HF repo]")
 previous_background =  P(
                 """ The quality and size of a pre-training dataset

 intro_list = P("""We present TxT360, the Trillion eXtracted Text corpus, a 5.7T token dataset for pretraining projects that:""")
+intro_list1 = Ol(
+                Li("Curates commonly used pretraining datasets, including all CommonCrawl"),
+                Li("Employs carefully selected filters designed for each data source"),
+                Li("Provides only unique data elements via globally deduplicated across all datasets"),
+                Li("Retains all deduplication metadata for custom upweighting"),
+                Li("Is Production ready! Download here [link to HF repo]")
+)
 previous_background =  P(
                 """ The quality and size of a pre-training dataset