import pandas as pd
import re

df_param = pd.read_csv('Parms.txt', sep='\t').rename(columns=lambda x: re.sub(' ', '_', x.lower())).rename(columns={'parm_code': 'parameter_cd', 'name':'parameter_name'})
df_unit = pd.read_csv('Units.txt', sep='\t').rename(columns=lambda x: re.sub(' ', '_', x.lower())).loc[:, ['code', 'abbr']].rename(columns={'code': 'unit_cd', 'abbr': 'unit_abbr'})
df_site = pd.read_csv('Site.txt', sep='\t').rename(columns=lambda x: re.sub(' ', '_', x.lower())).rename(columns={'aqs_code': 'aqs_cd'})
#df_param = pd.read_csv('parameters.csv')
df_meth = pd.read_csv('methods_all.csv').rename(columns=lambda x: re.sub(' ', '_', x.lower())).rename(columns={'parameter_code':'parameter_cd', 'method_code': 'meth_cd'}).iloc[:, 1:6]




fname_autogc = 'autogc_2023_bpa_338120240528175131246.txt'
df_autogc = pd.read_csv(fname_autogc, sep='|', skiprows = 10).rename(columns=lambda x: re.sub(' ', '_', x.lower())).iloc[:, :13].assign(aqs_cd=lambda x: (1000*x.state_cd + x.county_cd)*10000 + x.site_id)

df_meta_autogc = df_autogc[['parameter_cd',  'unit_cd',  'meth_cd']].drop_duplicates().merge(df_param).merge(df_unit).merge(df_meth)
df_meta_autogc.insert(0, 'kind', 'autogc')
df_site_autogc = df_autogc[['aqs_cd']].drop_duplicates().merge(df_site)
df_site_autogc.insert(0, 'kind', 'autogc')


# df.merge(df_param, left_on='parameter_cd', right_on='parm_code').merge(df_unit[['code', 'abbr']], left_on='unit_cd', right_on='code')

# df[['parameter_cd', 'meth_cd']].drop_duplicates().merge(df_meth.iloc[:, 1:6], left_on=['parameter_cd', 'meth_cd'], right_on=['parameter_code','method_code'])

fname_canister = 'canister_2023_bpa_338120240528181742488.txt'
df_canister = pd.read_csv(fname_canister, sep='|', skiprows = 10).rename(columns=lambda x: re.sub(' ', '_', x.lower())).iloc[:, :13].assign(aqs_cd=lambda x: (1000*x.state_cd + x.county_cd)*10000 + x.site_id)
df_meta_canister = df_canister[['parameter_cd',  'unit_cd',  'meth_cd']].drop_duplicates().merge(df_param).merge(df_unit).merge(df_meth)
df_meta_canister.insert(0, 'kind', 'canister')

df_site_canister = df_canister[['aqs_cd']].drop_duplicates().merge(df_site)
df_site_canister.insert(0, 'kind', 'canister')

fname_met = 'met_2023_bpa_338120240528210422183.txt'
df_met = pd.read_csv(fname_met, sep='|', skiprows = 10).rename(columns=lambda x: re.sub(' ', '_', x.lower())).iloc[:, :13].assign(aqs_cd=lambda x: (1000*x.state_cd + x.county_cd)*10000 + x.site_id)
df_meta_met = df_met[['parameter_cd',  'unit_cd',  'meth_cd']].drop_duplicates().merge(df_param).merge(df_unit).merge(df_meth)
df_meta_met.insert(0, 'kind', 'met')

df_site_met = df_met[['aqs_cd']].drop_duplicates().merge(df_site)
df_site_met.insert(0, 'kind', 'met')

df_param_found = pd.concat([df_meta_autogc, df_meta_canister, df_meta_met]).reset_index(drop=True)
df_param_found.to_csv('parameters_found.csv', index=False)

df_site_found  = pd.concat([df_site_autogc, df_site_canister, df_site_met]).reset_index(drop=True)
df_site_found.to_csv('sites_found.csv', index=False)







