read_cabrillo

This is how to load a Cabrillo file for analysis.

import pandas as pd


from dataclasses import dataclass
@dataclass
class CabRecord:
    freq:float
    mode: str
    when:str
    mycall:str
    myrst:str
    myexch:str
    dxcall:str
    dxrst:str
    dxexch:str
    txnum:int



def read_cab(filename):
    with open(filename,'rt') as ifp:
        data=ifp.read().split('\n')

    return data

def only_qso(data):
    qso_data=[a for a in data if a.startswith('QSO')]
    return qso_data

def make_records(qso_data):

    """                            --------info sent------- -------info rcvd--------
    QSO: freq  mo date       time call          rst exch   call          rst exch   t
    QSO: ***** ** yyyy-mm-dd nnnn ************* nnn ****** ************* nnn ****** n
    QSO:  3799 PH 1999-03-06 0711 HC8N          59  001    W1AW          59  001    0
    000000000111111111122222222223333333333444444444455555555556666666666777777777788
    123456789012345678901234567890123456789012345678901234567890123456789012345678901
    """
    fields=[(5,10),
                 (11,13),
                 (14,29),
                 (30,43),
                 (44,46),
                 (48,54),
                 (55,68),
                 (69,72),
                 (73,79),
                 (80,81)]
    recs=[]

    for n in qso_data:
        parts = [n[f[0]:f[1]].strip() for f in fields] 
        #self.data.append(parts)
        recs.append(CabRecord(*parts))
    return recs

records=make_records(only_qso(read_cab("2022 ARRL International DX Contest CW.cbr")))


df=pd.DataFrame(records)

And to see the first 10 records

df.head()
freq mode when mycall myrst myexch dxcall dxrst dxexch txnum
0 21022 CW 2022-02-19 0001 DV3A 59 400 K5ZG 599 CO 0
1 21030 CW 2022-02-19 0002 DV3A 59 400 K4PV 599 FL 0
2 21023 CW 2022-02-19 0003 DV3A 59 400 W8TK 599 AZ 0
3 21040 CW 2022-02-19 0004 DV3A 59 400 W7CXX 599 UT 0
4 21034 CW 2022-02-19 0005 DV3A 59 400 K6SRZ 599 CA 0
5 21025 CW 2022-02-19 0007 DV3A 59 400 N9NA 599 AZ 0
6 21032 CW 2022-02-19 0009 DV3A 59 400 N8OO 599 LA 0
7 21028 CW 2022-02-19 0009 DV3A 59 400 WC7Q 599 WA 0
8 21004 CW 2022-02-19 0010 DV3A 59 400 W0PR 599 MN 0
9 21041 CW 2022-02-19 0011 DV3A 59 400 KE6GLA 599 CA 0

Add extra info

At this point we can import some more utils.... to generate more data columns.

from ham.dxcc import DxccAll
from ham.band import HamBand

# Make freq a float
df['freq'] = df.freq.apply(lambda x: float('0'+x))

#create my dx object from my DxccAll
dx=DxccAll()

def get_country(call):
    fnd = dx.find(call)
    if fnd and fnd.Country_Name:
        return fnd.Country_Name
    else:
        return ""

# No point in this - as it was an ARRL Contest :) ... but just to check
df['Country']=df.dxcall.apply(lambda x: get_country(x))

And lets look at the top 10 again

df.head(10)

'

freq mode when mycall myrst myexch dxcall dxrst dxexch txnum Country
0 21022.0 CW 2022-02-19 0001 DV3A 59 400 K5ZG 599 CO 0 United States
1 21030.0 CW 2022-02-19 0002 DV3A 59 400 K4PV 599 FL 0 United States
2 21023.0 CW 2022-02-19 0003 DV3A 59 400 W8TK 599 AZ 0 United States
3 21040.0 CW 2022-02-19 0004 DV3A 59 400 W7CXX 599 UT 0 United States
4 21034.0 CW 2022-02-19 0005 DV3A 59 400 K6SRZ 599 CA 0 United States
5 21025.0 CW 2022-02-19 0007 DV3A 59 400 N9NA 599 AZ 0 United States
6 21032.0 CW 2022-02-19 0009 DV3A 59 400 N8OO 599 LA 0 United States
7 21028.0 CW 2022-02-19 0009 DV3A 59 400 WC7Q 599 WA 0 United States
8 21004.0 CW 2022-02-19 0010 DV3A 59 400 W0PR 599 MN 0 United States
9 21041.0 CW 2022-02-19 0011 DV3A 59 400 KE6GLA 599 CA 0 United States
'

You can see we added the Country name - All United States (except a few people who did not understand the contest rules).

Which is the easiest state for me to work .. ??

Lets see the top 5.

As the exchange is the state, we will create a new column, for clarity.

df['State']=df.dxexch.    #Use their Exchange as the State
df.groupby(by=['State']).agg({'dxexch':'count'},as_index=False).sort_values(['dxexch'],ascending=False)[:5]

And I see

'

dxexch
State
CA 79
WA 36
AZ 29
OR 23
BC 21
'

The least frequent ??

#Just alter the Value sort
df.groupby(by=['State']).agg({'dxexch':'count'},as_index=False).sort_values(['dxexch'],ascending=True)[:5]

'

dxexch
State
SD 1
QC 1
ON 1
OK 1
'
  • South Dakota
  • Quebec
  • Ontario
  • etc

That all looks/sounds about correct