shwetashweta05 commited on
Commit
16e390d
·
verified ·
1 Parent(s): 17326a7

Update pages/6.Data Collection.py

Browse files
Files changed (1) hide show
  1. pages/6.Data Collection.py +75 -0
pages/6.Data Collection.py CHANGED
@@ -75,3 +75,78 @@ if data_type == "Structured":
75
  mime="application/octet-stream",
76
  )
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  mime="application/octet-stream",
76
  )
77
 
78
+ # CSV Format Content
79
+ if format_selected == "CSV":
80
+ st.write("#### CSV Format")
81
+
82
+ # Part (a) What it is
83
+ st.subheader("What is CSV?")
84
+ st.write("""
85
+ CSV (Comma-Separated Values) is a plain-text file format used to store tabular data,
86
+ where each row corresponds to a record, and fields are separated by commas.
87
+ It is widely used for data exchange due to its simplicity and compatibility across systems.
88
+ Common file extensions include `.csv`.
89
+ """)
90
+
91
+ # Part (b) How to Read These Files
92
+ st.subheader("How to Read CSV Files?")
93
+ st.code("""
94
+ import pandas as pd
95
+ # Reading a CSV file
96
+ df = pd.read_csv("file.csv")
97
+ print(df.head())
98
+
99
+ # Reading a CSV file with custom delimiter
100
+ df = pd.read_csv("file.csv", sep=";")
101
+ """)
102
+
103
+ # Part (c) Issues Encountered
104
+ st.subheader("Common Issues Encountered When Handling CSV Files")
105
+ st.write("""
106
+ - **Incorrect Delimiters**: Files may use delimiters other than commas, e.g., semicolons or tabs.
107
+ - **Encoding Problems**: Files with different encodings (e.g., UTF-8, ISO-8859-1) may cause errors.
108
+ - **Missing or Corrupted Data**: Blank fields or inconsistencies in data.
109
+ - **Header Issues**: Missing headers or extra/unexpected columns.
110
+ - **Large File Sizes**: Memory limitations when processing large datasets.
111
+ """)
112
+
113
+ # Part (d) How to Overcome These Issues
114
+ st.subheader("How to Overcome These Issues?")
115
+ st.write("""
116
+ - **Incorrect Delimiters**: Specify the correct delimiter when reading:
117
+ ```python
118
+ df = pd.read_csv("file.csv", sep=";")
119
+ ```
120
+ - **Encoding Problems**: Specify the encoding explicitly:
121
+ ```python
122
+ df = pd.read_csv("file.csv", encoding="utf-8")
123
+ ```
124
+ - **Missing or Corrupted Data**: Handle missing values using pandas:
125
+ ```python
126
+ df.fillna("NA", inplace=True)
127
+ ```
128
+ - **Header Issues**: Assign custom headers or skip problematic rows:
129
+ ```python
130
+ df = pd.read_csv("file.csv", header=None)
131
+ df.columns = ["Column1", "Column2", "Column3"]
132
+ ```
133
+ - **Large Files**: Use chunk processing for large files:
134
+ ```python
135
+ chunks = pd.read_csv("file.csv", chunksize=1000)
136
+ for chunk in chunks:
137
+ process(chunk)
138
+ ```
139
+ """)
140
+
141
+ # Downloadable Guide Button
142
+ st.markdown("### Download Coding Guide:")
143
+ if st.button("Download CSV Guide"):
144
+ # Provide a downloadable Jupyter Notebook file
145
+ file_path = "CSV_guide.ipynb" # Replace with the actual file path
146
+ with open(file_path, "rb") as file:
147
+ st.download_button(
148
+ label="Download CSV Guide",
149
+ data=file,
150
+ file_name="CSV_guide.ipynb",
151
+ mime="application/octet-stream",
152
+ )